Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions agent_memory_server/extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from agent_memory_server.logging import get_logger
from agent_memory_server.models import MemoryRecord
from agent_memory_server.utils.datetime import parse_iso8601_datetime
from agent_memory_server.utils.llm_json import parse_llm_json
from agent_memory_server.utils.tag_codec import sanitize_tag_values


Expand Down Expand Up @@ -152,7 +153,7 @@ async def extract_entities_llm(text: str) -> list[str]:
response_format={"type": "json_object"},
)
try:
entities = json.loads(response.content).get("entities", [])
entities = parse_llm_json(response.content).get("entities", [])
except (json.JSONDecodeError, KeyError):
Copy link

Copilot AI Mar 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The exception handler includes KeyError, but .get("entities", []) won’t raise KeyError. The more likely failure mode here (especially with the new lenient parser) is AttributeError/TypeError if the parsed JSON isn’t a dict (e.g., model returns a top-level list). Catching those and treating them as a parse failure would avoid unexpected retries/crashes.

Suggested change
except (json.JSONDecodeError, KeyError):
except (json.JSONDecodeError, AttributeError, TypeError):

Copilot uses AI. Check for mistakes.
logger.error(f"Error decoding NER JSON: {response.content}")
entities = []
Expand Down Expand Up @@ -191,7 +192,7 @@ async def extract_topics_llm(
response_format={"type": "json_object"},
)
try:
topics = json.loads(response.content).get("topics", [])
topics = parse_llm_json(response.content).get("topics", [])
except (json.JSONDecodeError, KeyError):
logger.error(f"Error decoding topics JSON: {response.content}")
topics = []
Expand Down
9 changes: 5 additions & 4 deletions agent_memory_server/memory_strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
secure_format_prompt,
validate_custom_prompt,
)
from agent_memory_server.utils.llm_json import parse_llm_json


logger = get_logger(__name__)
Expand Down Expand Up @@ -175,7 +176,7 @@ async def extract_memories(
response_format={"type": "json_object"},
)
try:
response_data = json.loads(response.content)
response_data = parse_llm_json(response.content)
Copy link

Copilot AI Mar 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

parse_llm_json returns Any, but this code assumes it’s a dict (response_data.get(...)) and only catches JSONDecodeError. If the model returns a valid non-object JSON value (list/string/etc.), this will raise AttributeError and won’t be logged as a decode problem. Consider validating response_data is a dict (or catching AttributeError/TypeError and re-raising as a decode/validation error) so retries and logs behave consistently.

Suggested change
response_data = parse_llm_json(response.content)
response_data = parse_llm_json(response.content)
if not isinstance(response_data, dict):
logger.error(
"Expected JSON object with 'memories' key, got %s: %r",
type(response_data).__name__,
response.content,
)
# Treat non-object top-level JSON as a decode/validation error
raise json.JSONDecodeError(
"Expected JSON object at top level",
response.content,
0,
)

Copilot uses AI. Check for mistakes.
return response_data.get("memories", [])
except json.JSONDecodeError:
logger.error(f"Error decoding JSON: {response.content}")
Expand Down Expand Up @@ -267,7 +268,7 @@ async def extract_memories(
response_format={"type": "json_object"},
)
try:
response_data = json.loads(response.content)
response_data = parse_llm_json(response.content)
return response_data.get("memories", [])
except json.JSONDecodeError:
logger.error(f"Error decoding JSON: {response.content}")
Expand Down Expand Up @@ -360,7 +361,7 @@ async def extract_memories(
response_format={"type": "json_object"},
)
try:
response_data = json.loads(response.content)
response_data = parse_llm_json(response.content)
return response_data.get("memories", [])
except json.JSONDecodeError:
logger.error(f"Error decoding JSON: {response.content}")
Expand Down Expand Up @@ -444,7 +445,7 @@ async def extract_memories(
response_format={"type": "json_object"},
)
try:
response_data = json.loads(response.content)
response_data = parse_llm_json(response.content)
memories = response_data.get("memories", [])

# Filter and validate output memories for security
Expand Down
50 changes: 50 additions & 0 deletions agent_memory_server/utils/llm_json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
"""Helpers for parsing JSON-shaped LLM responses."""

from __future__ import annotations

import json
import re
from collections.abc import Iterator
from typing import Any


_CODE_FENCE_RE = re.compile(r"```(?:json)?\s*(.*?)\s*```", re.IGNORECASE | re.DOTALL)
_JSON_START_RE = re.compile(r"[{\[]")

Comment on lines +11 to +13
Copy link

Copilot AI Mar 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

parse_llm_json’s fallback scan matches both { and [ (_JSON_START_RE = r"[{\[]"), so it can successfully parse a top-level list/scalar from a prose-wrapped response. All current call sites assume a dict and call .get(...), which will raise AttributeErrorfor non-dict JSON. Consider restricting the scan to{(since callers requestjson_object`) and/or validating the parsed value is a dict and raising a decode/validation error otherwise.

Copilot uses AI. Check for mistakes.

def parse_llm_json(content: str) -> Any:
"""Parse JSON from raw, fenced, or prose-wrapped LLM responses."""
normalized = content.strip()
decoder = json.JSONDecoder()

try:
return decoder.decode(normalized)
except json.JSONDecodeError as error:
original_error = error

for candidate in _iter_json_candidates(normalized):
try:
parsed, _ = decoder.raw_decode(candidate)
return parsed
except json.JSONDecodeError:
continue

raise original_error


def _iter_json_candidates(content: str) -> Iterator[str]:
"""Yield likely JSON payloads embedded within an LLM response."""
seen: set[str] = set()

for match in _CODE_FENCE_RE.finditer(content):
candidate = match.group(1).strip()
if candidate and candidate not in seen:
seen.add(candidate)
yield candidate

# Fall back to scanning for embedded JSON objects/arrays inside prose.
for match in _JSON_START_RE.finditer(content):
candidate = content[match.start() :].lstrip()
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Potential Bug: Hash collision with different whitespace

The deduplication logic using seen.add(candidate) may fail to skip actual duplicates when the same JSON content has different whitespace. For example:

# These would be treated as different candidates even though they're the same JSON:
candidate1 = '{"key": "value"}'  
candidate2 = '{  "key":   "value"  }'

This could cause:

  1. Unnecessary parsing attempts for the same JSON with different formatting
  2. Incorrect behavior if the first attempt fails due to trailing characters but a later whitespace variant would succeed

Suggested improvement:

Consider normalizing candidates before deduplication, or use a more robust approach:

def _iter_json_candidates(content: str) -> Iterator[str]:
    """Yield likely JSON payloads embedded within an LLM response."""
    seen: set[str] = set()

    for match in _CODE_FENCE_RE.finditer(content):
        candidate = match.group(1).strip()
        # Normalize whitespace for deduplication
        normalized = ' '.join(candidate.split())
        if candidate and normalized not in seen:
            seen.add(normalized)
            yield candidate

    # Fall back to scanning for embedded JSON objects/arrays inside prose.
    for match in _JSON_START_RE.finditer(content):
        candidate = content[match.start() :].lstrip()
        normalized = ' '.join(candidate.split())
        if candidate and normalized not in seen:
            seen.add(normalized)
            yield candidate

Alternatively, since you're already using a try/catch for parsing, you might consider removing the deduplication entirely and just rely on finding the first successfully parsed JSON, as the performance cost is likely minimal.

Comment on lines +46 to +47
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From what I'm seeing, it looks like this regex approach would produce a lot of results if your JSON contained a list of dictionaries (or if you had a dictionary that contained lists).

>>> p = re.compile(r"[{\[]")
>>> s = '```json\n[{"a": 1}, {"a": 2}, {"a": 3}]\n```'	
>>> for match in p.finditer(s):
... 	print(s[match.start() :].lstrip())
	
[{"a": 1}, {"a": 2}, {"a": 3}]
\```
{"a": 1}, {"a": 2}, {"a": 3}]
\```
{"a": 2}, {"a": 3}]
\```
{"a": 3}]
\```

It probably won't matter since parse_llm_json should be able to parse an earlier match and not reach these awkward ones, which wouldn't be parseable anyway, but it leaves a small door open to strange behaviour.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good catch

if candidate and candidate not in seen:
seen.add(candidate)
yield candidate
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Uncaught AttributeError when parser extracts non-dict JSON

Medium Severity

The _JSON_START_RE fallback matches every { and [ in the content. If prose contains a small valid JSON fragment before the intended object (e.g., "I found [3] entities: {\"entities\": [...]}") then raw_decode succeeds on [3], returning a list. All callers immediately call .get(...) on the result, which raises AttributeError — not caught by the existing except (json.JSONDecodeError, KeyError) handlers. Previously json.loads would have raised JSONDecodeError on this input, which was properly handled. This regression converts a graceful fallback into an uncaught exception that triggers three tenacity retries followed by a RetryError.

Additional Locations (2)
Fix in Cursor Fix in Web

77 changes: 54 additions & 23 deletions examples/agent_memory_server_interactive_guide.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -602,6 +602,7 @@
" await asyncio.sleep(poll_interval)\n",
" return False\n",
"\n",
"\n",
"indexed = await wait_for_indexing(client, USER_ID, NAMESPACE)\n",
"print(f\"Memories indexed: {indexed}\")\n",
"\n",
Expand All @@ -622,10 +623,9 @@
" \"limit\": 5,\n",
" # distance_threshold: Lower = stricter when set. If omitted, the server\n",
" # uses no distance filter (distance_threshold=None) for broader KNN recall.\n",
" \"user_id\": {\"eq\": USER_ID} # Only search Nitin's memories\n",
" }\n",
")\n",
"\n"
" \"user_id\": {\"eq\": USER_ID}, # Only search Nitin's memories\n",
" },\n",
")"
]
},
{
Expand Down Expand Up @@ -877,10 +877,26 @@
],
"source": [
"messages = [\n",
" MemoryMessage(role=\"user\", content=\"I'm planning a trip to Japan next month!\", created_at=datetime.now(UTC)),\n",
" MemoryMessage(role=\"assistant\", content=\"Exciting! Based on your preferences, I know you enjoy hiking and vegetarian food. Japan has amazing options for both!\", created_at=datetime.now(UTC)),\n",
" MemoryMessage(role=\"user\", content=\"Yes! I'd love to hike Mount Fuji and find good vegetarian ramen.\", created_at=datetime.now(UTC)),\n",
" MemoryMessage(role=\"assistant\", content=\"Perfect! I'll remember your interest in Mount Fuji. For vegetarian ramen, Kyoto has excellent options.\", created_at=datetime.now(UTC))\n",
" MemoryMessage(\n",
" role=\"user\",\n",
" content=\"I'm planning a trip to Japan next month!\",\n",
" created_at=datetime.now(UTC),\n",
" ),\n",
" MemoryMessage(\n",
" role=\"assistant\",\n",
" content=\"Exciting! Based on your preferences, I know you enjoy hiking and vegetarian food. Japan has amazing options for both!\",\n",
" created_at=datetime.now(UTC),\n",
" ),\n",
" MemoryMessage(\n",
" role=\"user\",\n",
" content=\"Yes! I'd love to hike Mount Fuji and find good vegetarian ramen.\",\n",
" created_at=datetime.now(UTC),\n",
" ),\n",
" MemoryMessage(\n",
" role=\"assistant\",\n",
" content=\"Perfect! I'll remember your interest in Mount Fuji. For vegetarian ramen, Kyoto has excellent options.\",\n",
" created_at=datetime.now(UTC),\n",
" ),\n",
"]\n",
"\n",
"updated_memory = WorkingMemory(\n",
Expand Down Expand Up @@ -1527,10 +1543,26 @@
"source": [
"# Step 2: Just store the conversation - extraction happens automatically!\n",
"conversation = [\n",
" MemoryMessage(role=\"user\", content=\"I'm Nitin. I'm planning a hiking trip to Japan and need vegetarian food options.\", created_at=datetime.now(UTC)),\n",
" MemoryMessage(role=\"assistant\", content=\"Great choice! Japan has amazing hiking trails and excellent vegetarian cuisine.\", created_at=datetime.now(UTC)),\n",
" MemoryMessage(role=\"user\", content=\"I prefer nice hotels with good amenities, not too fancy but comfortable. All depends on the budget.\", created_at=datetime.now(UTC)),\n",
" MemoryMessage(role=\"assistant\", content=\"Noted! I'll remember your preference for comfortable mid-tier accommodations.\", created_at=datetime.now(UTC))\n",
" MemoryMessage(\n",
" role=\"user\",\n",
" content=\"I'm Nitin. I'm planning a hiking trip to Japan and need vegetarian food options.\",\n",
" created_at=datetime.now(UTC),\n",
" ),\n",
" MemoryMessage(\n",
" role=\"assistant\",\n",
" content=\"Great choice! Japan has amazing hiking trails and excellent vegetarian cuisine.\",\n",
" created_at=datetime.now(UTC),\n",
" ),\n",
" MemoryMessage(\n",
" role=\"user\",\n",
" content=\"I prefer nice hotels with good amenities, not too fancy but comfortable. All depends on the budget.\",\n",
" created_at=datetime.now(UTC),\n",
" ),\n",
" MemoryMessage(\n",
" role=\"assistant\",\n",
" content=\"Noted! I'll remember your preference for comfortable mid-tier accommodations.\",\n",
" created_at=datetime.now(UTC),\n",
" ),\n",
"]\n",
"\n",
"working_memory_update = WorkingMemory(\n",
Expand Down Expand Up @@ -2324,11 +2356,11 @@
" recency=RecencyConfig(\n",
" recency_boost=True,\n",
" semantic_weight=0.6, # Lower semantic weight\n",
" recency_weight=0.4, # Higher recency weight\n",
" recency_weight=0.4, # Higher recency weight\n",
" half_life_last_access_days=3.0, # Faster decay\n",
" half_life_created_days=14.0\n",
" half_life_created_days=14.0,\n",
" ),\n",
" limit=5\n",
" limit=5,\n",
")\n",
"\n",
"print(f\"Found {results_recency.total} memories with recency boost:\")\n",
Expand Down Expand Up @@ -2362,7 +2394,7 @@
" namespace={\"eq\": \"travel_agent\"},\n",
" user_id={\"eq\": \"nitin\"},\n",
" recency=RecencyConfig(recency_boost=False), # Pure vector similarity\n",
" limit=5\n",
" limit=5,\n",
")\n",
"\n",
"print(f\"Pure semantic search found {results_pure_semantic.total} memories:\")\n",
Expand Down Expand Up @@ -2436,18 +2468,15 @@
" text=\"vacation\",\n",
" namespace={\"eq\": \"travel_agent\"},\n",
" search_mode=SearchModeEnum.SEMANTIC, # or just \"semantic\"\n",
" limit=3\n",
" limit=3,\n",
")\n",
"print(f\"SEMANTIC search for 'vacation' ({semantic_results.total} results):\")\n",
"for mem in semantic_results.memories:\n",
" print(f\" [{mem.score:.3f}] {mem.text[:60]}...\")\n",
"\n",
"# Keyword search - exact term matching\n",
"keyword_results = await client.search_long_term_memory(\n",
" text=\"vegetarian\",\n",
" namespace={\"eq\": \"travel_agent\"},\n",
" search_mode=\"keyword\",\n",
" limit=3\n",
" text=\"vegetarian\", namespace={\"eq\": \"travel_agent\"}, search_mode=\"keyword\", limit=3\n",
")\n",
"print(f\"\\nKEYWORD search for 'vegetarian' ({keyword_results.total} results):\")\n",
"for mem in keyword_results.memories:\n",
Expand All @@ -2459,9 +2488,11 @@
" namespace={\"eq\": \"travel_agent\"},\n",
" search_mode=\"hybrid\",\n",
" hybrid_alpha=0.7, # 0.7 = 70% semantic, 30% keyword weight\n",
" limit=3\n",
" limit=3,\n",
")\n",
"print(\n",
" f\"\\nHYBRID search for 'vegetarian food options' ({hybrid_results.total} results):\"\n",
")\n",
"print(f\"\\nHYBRID search for 'vegetarian food options' ({hybrid_results.total} results):\")\n",
"for mem in hybrid_results.memories:\n",
" print(f\" [{mem.score:.3f}] {mem.text[:60]}...\")"
]
Expand Down
128 changes: 128 additions & 0 deletions tests/test_issue_236.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
"""Regression tests for GitHub issue #236."""

import json
from unittest.mock import AsyncMock, Mock, patch

import pytest

from agent_memory_server.extraction import extract_entities_llm, extract_topics_llm
from agent_memory_server.llm import ChatCompletionResponse
from agent_memory_server.memory_strategies import (
CustomMemoryStrategy,
DiscreteMemoryStrategy,
SummaryMemoryStrategy,
UserPreferencesMemoryStrategy,
)
from agent_memory_server.utils.llm_json import parse_llm_json


class TestIssue236LlmJsonParsing:
"""Verify JSON parsing tolerates markdown fences and wrapper prose."""

@pytest.mark.parametrize(
("content", "expected"),
[
(
'{"entities": ["Redis", "Snowflake"]}',
{"entities": ["Redis", "Snowflake"]},
),
(
'```json\n{"entities": ["Redis", "Snowflake"]}\n```',
{"entities": ["Redis", "Snowflake"]},
),
(
'Here are the extracted topics:\n```json\n{"topics": ["data engineering", "recommendation engines"]}\n```\nI found these topics in the text.',
{"topics": ["data engineering", "recommendation engines"]},
),
],
)
def test_parse_llm_json_handles_wrapped_content(self, content, expected):
"""The helper should recover valid JSON from common LLM wrappers."""
assert parse_llm_json(content) == expected

def test_parse_llm_json_raises_for_invalid_content(self):
"""Invalid non-JSON content should still fail fast."""
with pytest.raises(json.JSONDecodeError):
parse_llm_json("This response contains no JSON payload at all.")

@pytest.mark.asyncio
@patch("agent_memory_server.extraction.LLMClient.create_chat_completion")
async def test_extract_entities_llm_parses_fenced_json(self, mock_llm):
"""Entity extraction should work when the model wraps JSON in fences."""
mock_llm.return_value = Mock(
content='```json\n{"entities": ["Redis", "Snowflake"]}\n```'
)

entities = await extract_entities_llm("Redis works with Snowflake.")

assert set(entities) == {"Redis", "Snowflake"}
mock_llm.assert_called_once()

@pytest.mark.asyncio
@patch("agent_memory_server.extraction.LLMClient.create_chat_completion")
async def test_extract_topics_llm_parses_prose_wrapped_json(self, mock_llm):
"""Topic extraction should work when commentary surrounds the JSON."""
mock_llm.return_value = Mock(
content='Here are the extracted topics:\n```json\n{"topics": ["data engineering", "recommendation engines", "streaming"]}\n```\nI found these topics in the text.'
)

topics = await extract_topics_llm(
"Kafka pipelines support recommendations.", num_topics=2
)

assert topics == ["data engineering", "recommendation engines"]
mock_llm.assert_called_once()


@pytest.mark.asyncio
class TestIssue236MemoryStrategies:
"""Verify memory extraction strategies accept wrapped JSON responses."""

@pytest.mark.parametrize(
("strategy_builder", "response_content"),
[
(
lambda: DiscreteMemoryStrategy(),
'```json\n{"memories": [{"type": "semantic", "text": "User prefers Redis", "topics": ["preferences"], "entities": ["User", "Redis"], "event_date": null}]}\n```',
),
(
lambda: SummaryMemoryStrategy(max_summary_length=100),
'Summary generated below.\n```json\n{"memories": [{"type": "semantic", "text": "User discussed Redis adoption", "topics": ["redis"], "entities": ["User", "Redis"]}]}\n```\nDone.',
),
(
lambda: UserPreferencesMemoryStrategy(),
'```json\n{"memories": [{"type": "semantic", "text": "User prefers dark mode", "topics": ["preferences"], "entities": ["User"]}]}\n```',
),
(
lambda: CustomMemoryStrategy(
custom_prompt="Extract memories from: {message}"
),
'Custom extraction result:\n```json\n{"memories": [{"type": "semantic", "text": "User prefers async updates", "topics": ["communication"], "entities": ["User"]}]}\n```',
),
],
)
async def test_strategies_parse_wrapped_json(
self, strategy_builder, response_content
):
"""All strategy variants should parse wrapped JSON without retry failure."""
strategy = strategy_builder()
response = ChatCompletionResponse(
content=response_content,
finish_reason="stop",
prompt_tokens=100,
completion_tokens=50,
total_tokens=150,
model="gpt-4o-mini",
)

with patch(
"agent_memory_server.memory_strategies.LLMClient.create_chat_completion",
new_callable=AsyncMock,
return_value=response,
) as mock_create:
memories = await strategy.extract_memories("Store this memory.")

assert len(memories) == 1
assert memories[0]["type"] == "semantic"
assert memories[0]["text"].startswith("User")
mock_create.assert_called_once()
Loading