Skip to content

Commit 7ceb930

Browse files
abrookinsclaude
andcommitted
Implement thread-aware contextual grounding for memory extraction
- Add debounce mechanism to prevent frequent re-extraction of same threads - Implement thread-aware extraction that processes full conversation context - Update working memory promotion to use new extraction approach - Resolve cross-message pronoun references by providing complete context - Add comprehensive tests for thread-aware grounding functionality 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <[email protected]>
1 parent 2cbfe81 commit 7ceb930

File tree

4 files changed

+608
-10
lines changed

4 files changed

+608
-10
lines changed

agent_memory_server/long_term_memory.py

Lines changed: 151 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,138 @@
9898

9999
logger = logging.getLogger(__name__)
100100

101+
# Debounce configuration for thread-aware extraction
102+
EXTRACTION_DEBOUNCE_TTL = 300 # 5 minutes
103+
EXTRACTION_DEBOUNCE_KEY_PREFIX = "extraction_debounce"
104+
105+
106+
async def should_extract_session_thread(session_id: str, redis: Redis) -> bool:
107+
"""
108+
Check if enough time has passed since last thread-aware extraction for this session.
109+
110+
This implements a debounce mechanism to avoid constantly re-extracting memories
111+
from the same conversation thread as new messages arrive.
112+
113+
Args:
114+
session_id: The session ID to check
115+
redis: Redis client
116+
117+
Returns:
118+
True if extraction should proceed, False if debounced
119+
"""
120+
121+
debounce_key = f"{EXTRACTION_DEBOUNCE_KEY_PREFIX}:{session_id}"
122+
123+
# Check if debounce key exists
124+
exists = await redis.exists(debounce_key)
125+
if not exists:
126+
# Set debounce key with TTL to prevent extraction for the next period
127+
await redis.setex(debounce_key, EXTRACTION_DEBOUNCE_TTL, "extracting")
128+
logger.info(
129+
f"Starting thread-aware extraction for session {session_id} (debounce set for {EXTRACTION_DEBOUNCE_TTL}s)"
130+
)
131+
return True
132+
133+
remaining_ttl = await redis.ttl(debounce_key)
134+
logger.info(
135+
f"Skipping thread-aware extraction for session {session_id} (debounced, {remaining_ttl}s remaining)"
136+
)
137+
return False
138+
139+
140+
async def extract_memories_from_session_thread(
141+
session_id: str,
142+
namespace: str | None = None,
143+
user_id: str | None = None,
144+
llm_client: OpenAIClientWrapper | AnthropicClientWrapper | None = None,
145+
) -> list[MemoryRecord]:
146+
"""
147+
Extract memories from the entire conversation thread in working memory.
148+
149+
This provides full conversational context for proper contextual grounding,
150+
allowing pronouns and references to be resolved across the entire thread.
151+
152+
Args:
153+
session_id: The session ID to extract memories from
154+
namespace: Optional namespace for the memories
155+
user_id: Optional user ID for the memories
156+
llm_client: Optional LLM client for extraction
157+
158+
Returns:
159+
List of extracted memory records with proper contextual grounding
160+
"""
161+
from agent_memory_server.working_memory import get_working_memory
162+
163+
# Get the complete working memory thread
164+
working_memory = await get_working_memory(
165+
session_id=session_id, namespace=namespace, user_id=user_id
166+
)
167+
168+
if not working_memory or not working_memory.messages:
169+
logger.info(f"No working memory messages found for session {session_id}")
170+
return []
171+
172+
# Build full conversation context from all messages
173+
conversation_messages = []
174+
for msg in working_memory.messages:
175+
# Include role and content for better context
176+
role_prefix = (
177+
f"[{msg.role.upper()}]: " if hasattr(msg, "role") and msg.role else ""
178+
)
179+
conversation_messages.append(f"{role_prefix}{msg.content}")
180+
181+
full_conversation = "\n".join(conversation_messages)
182+
183+
logger.info(
184+
f"Extracting memories from {len(working_memory.messages)} messages in session {session_id}"
185+
)
186+
logger.debug(
187+
f"Full conversation context length: {len(full_conversation)} characters"
188+
)
189+
190+
# Use the enhanced extraction prompt with contextual grounding
191+
from agent_memory_server.extraction import DISCRETE_EXTRACTION_PROMPT
192+
193+
client = llm_client or await get_model_client(settings.generation_model)
194+
195+
try:
196+
response = await client.create_chat_completion(
197+
model=settings.generation_model,
198+
prompt=DISCRETE_EXTRACTION_PROMPT.format(
199+
message=full_conversation, top_k_topics=settings.top_k_topics
200+
),
201+
response_format={"type": "json_object"},
202+
)
203+
204+
extraction_result = json.loads(response.choices[0].message.content)
205+
memories_data = extraction_result.get("memories", [])
206+
207+
logger.info(
208+
f"Extracted {len(memories_data)} memories from session thread {session_id}"
209+
)
210+
211+
# Convert to MemoryRecord objects
212+
extracted_memories = []
213+
for memory_data in memories_data:
214+
memory = MemoryRecord(
215+
id=str(ULID()),
216+
text=memory_data["text"],
217+
memory_type=memory_data.get("type", "semantic"),
218+
topics=memory_data.get("topics", []),
219+
entities=memory_data.get("entities", []),
220+
session_id=session_id,
221+
namespace=namespace,
222+
user_id=user_id,
223+
discrete_memory_extracted="t", # Mark as extracted
224+
)
225+
extracted_memories.append(memory)
226+
227+
return extracted_memories
228+
229+
except Exception as e:
230+
logger.error(f"Error extracting memories from session thread {session_id}: {e}")
231+
return []
232+
101233

102234
async def extract_memory_structure(memory: MemoryRecord):
103235
redis = await get_redis_conn()
@@ -1124,23 +1256,32 @@ async def promote_working_memory_to_long_term(
11241256
updated_memories = []
11251257
extracted_memories = []
11261258

1127-
# Find messages that haven't been extracted yet for discrete memory extraction
1259+
# Thread-aware discrete memory extraction with debouncing
11281260
unextracted_messages = [
11291261
message
11301262
for message in current_working_memory.messages
11311263
if message.discrete_memory_extracted == "f"
11321264
]
11331265

11341266
if settings.enable_discrete_memory_extraction and unextracted_messages:
1135-
logger.info(f"Extracting memories from {len(unextracted_messages)} messages")
1136-
extracted_memories = await extract_memories_from_messages(
1137-
messages=unextracted_messages,
1138-
session_id=session_id,
1139-
user_id=user_id,
1140-
namespace=namespace,
1141-
)
1142-
for message in unextracted_messages:
1143-
message.discrete_memory_extracted = "t"
1267+
# Check if we should run thread-aware extraction (debounced)
1268+
if await should_extract_session_thread(session_id, redis):
1269+
logger.info(
1270+
f"Running thread-aware extraction from {len(current_working_memory.messages)} total messages in session {session_id}"
1271+
)
1272+
extracted_memories = await extract_memories_from_session_thread(
1273+
session_id=session_id,
1274+
namespace=namespace,
1275+
user_id=user_id,
1276+
)
1277+
1278+
# Mark ALL messages in the session as extracted since we processed the full thread
1279+
for message in current_working_memory.messages:
1280+
message.discrete_memory_extracted = "t"
1281+
1282+
else:
1283+
logger.info(f"Skipping extraction for session {session_id} - debounced")
1284+
extracted_memories = []
11441285

11451286
for memory in current_working_memory.memories:
11461287
if memory.persisted_at is None:

agent_memory_server/mcp.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,27 @@ async def create_long_term_memories(
181181
182182
This tool saves memories contained in the payload for future retrieval.
183183
184+
CONTEXTUAL GROUNDING REQUIREMENTS:
185+
When creating memories, you MUST resolve all contextual references to their concrete referents:
186+
187+
1. PRONOUNS: Replace ALL pronouns (he/she/they/him/her/them/his/hers/theirs) with actual person names
188+
- "He prefers Python" → "John prefers Python" (if "he" refers to John)
189+
- "Her expertise is valuable" → "Sarah's expertise is valuable" (if "her" refers to Sarah)
190+
191+
2. TEMPORAL REFERENCES: Convert relative time expressions to absolute dates/times
192+
- "yesterday" → "2024-03-15" (if today is March 16, 2024)
193+
- "last week" → "March 4-10, 2024" (if current week is March 11-17, 2024)
194+
195+
3. SPATIAL REFERENCES: Resolve place references to specific locations
196+
- "there" → "San Francisco office" (if referring to SF office)
197+
- "here" → "the main conference room" (if referring to specific room)
198+
199+
4. DEFINITE REFERENCES: Resolve definite articles to specific entities
200+
- "the project" → "the customer portal redesign project"
201+
- "the bug" → "the authentication timeout issue"
202+
203+
MANDATORY: Never create memories with unresolved pronouns, vague time references, or unclear spatial references. Always ground contextual references using the full conversation context.
204+
184205
MEMORY TYPES - SEMANTIC vs EPISODIC:
185206
186207
There are two main types of long-term memories you can create:

0 commit comments

Comments
 (0)