Skip to content

Commit a355cdd

Browse files
authored
fix: General text memory (#140)
* tmp * feat: simplify general memory * test & format * test & format * test & format * restore type/visibility
1 parent b5daa0f commit a355cdd

File tree

5 files changed

+73
-439
lines changed

5 files changed

+73
-439
lines changed

examples/core_memories/general_textual_memory.py

Lines changed: 10 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from memos.configs.memory import MemoryConfigFactory
22
from memos.memories.factory import MemoryFactory
33

4-
54
config = MemoryConfigFactory(
65
backend="general_text",
76
config={
@@ -36,33 +35,24 @@
3635
{
3736
"memory": "I'm a RUCer, I'm happy.",
3837
"metadata": {
39-
"type": "self-introduction",
40-
"memory_time": "2025-05-26",
38+
"key": "happy RUCer",
4139
"source": "conversation",
42-
"confidence": 90.0,
43-
"entities": ["RUCer"],
4440
"tags": ["happy"],
45-
"visibility": "private",
4641
"updated_at": "2025-05-19T00:00:00",
4742
},
4843
},
4944
{
5045
"memory": "MemOS is awesome!",
5146
"metadata": {
52-
"type": "fact",
53-
"memory_time": "2025-05-19",
47+
"key": "MemOS",
5448
"source": "conversation",
55-
"confidence": 100.0,
56-
"entities": ["MemOS"],
5749
"tags": ["awesome"],
58-
"visibility": "public",
5950
"updated_at": "2025-05-19T00:00:00",
6051
},
6152
},
6253
]
6354
example_id = "a19b6caa-5d59-42ad-8c8a-e4f7118435b4"
6455

65-
6656
print("===== Extract memories =====")
6757
memories = m.extract(
6858
[
@@ -80,7 +70,12 @@
8070
{
8171
"id": example_id,
8272
"memory": "User is Chinese.",
83-
"metadata": {"type": "opinion"},
73+
"metadata": {
74+
"key": "User Nationality",
75+
"source": "conversation",
76+
"tags": ["Nationality"],
77+
"updated_at": "2025-05-18T00:00:00",
78+
},
8479
}
8580
]
8681
)
@@ -103,13 +98,9 @@
10398
"id": example_id,
10499
"memory": "User is Canadian.",
105100
"metadata": {
106-
"type": "opinion",
107-
"confidence": 85,
108-
"memory_time": "2025-05-24",
101+
"key": "User Nationality",
109102
"source": "conversation",
110-
"entities": ["Canadian"],
111-
"tags": ["happy"],
112-
"visibility": "private",
103+
"tags": ["Nationality"],
113104
"updated_at": "2025-05-19T00:00:00",
114105
},
115106
},

src/memos/memories/textual/general.py

Lines changed: 35 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,11 @@
1212
from memos.log import get_logger
1313
from memos.memories.textual.base import BaseTextMemory
1414
from memos.memories.textual.item import TextualMemoryItem
15+
from memos.templates.mem_reader_prompts import SIMPLE_STRUCT_MEM_READER_PROMPT
1516
from memos.types import MessageList
1617
from memos.vec_dbs.factory import QdrantVecDB, VecDBFactory
1718
from memos.vec_dbs.item import VecDBItem
1819

19-
2020
logger = get_logger(__name__)
2121

2222

@@ -36,7 +36,7 @@ def __init__(self, config: GeneralTextMemoryConfig):
3636
stop=stop_after_attempt(3),
3737
retry=retry_if_exception_type(json.JSONDecodeError),
3838
before_sleep=lambda retry_state: logger.warning(
39-
EXTRACTION_RETRY_LOG.format(
39+
"Extracting memory failed due to JSON decode error: {error}, Attempt retry: {attempt_number} / {max_attempt_number}".format(
4040
error=retry_state.outcome.exception(),
4141
attempt_number=retry_state.attempt_number,
4242
max_attempt_number=3,
@@ -52,14 +52,27 @@ def extract(self, messages: MessageList) -> list[TextualMemoryItem]:
5252
Returns:
5353
List of TextualMemoryItem objects representing the extracted memories.
5454
"""
55-
str_messages = json.dumps(messages)
56-
user_query = EXTRACTION_PROMPT_PART_1 + EXTRACTION_PROMPT_PART_2.format(
57-
messages=str_messages
55+
56+
str_messages = "\n".join(
57+
[message["role"] + ":" + message["content"] for message in messages]
5858
)
59-
response = self.extractor_llm.generate([{"role": "user", "content": user_query}])
60-
raw_extracted_memories = json.loads(response)
59+
60+
prompt = SIMPLE_STRUCT_MEM_READER_PROMPT.replace("${conversation}", str_messages)
61+
messages = [{"role": "user", "content": prompt}]
62+
response_text = self.extractor_llm.generate(messages)
63+
response_json = self.parse_json_result(response_text)
64+
6165
extracted_memories = [
62-
TextualMemoryItem(**memory_dict) for memory_dict in raw_extracted_memories
66+
TextualMemoryItem(
67+
memory=memory_dict["value"],
68+
metadata={
69+
"key": memory_dict["key"],
70+
"source": "conversation",
71+
"tags": memory_dict["tags"],
72+
"updated_at": datetime.now().isoformat(),
73+
},
74+
)
75+
for memory_dict in response_json["memory list"]
6376
]
6477

6578
return extracted_memories
@@ -206,83 +219,17 @@ def _embed_one_sentence(self, sentence: str) -> list[float]:
206219
"""Embed a single sentence."""
207220
return self.embedder.embed([sentence])[0]
208221

209-
210-
EXTRACTION_PROMPT_PART_1 = f"""You are a memory extractor. Your task is to extract memories from the given messages.
211-
* You will receive a list of messages, each with a role (user or assistant) and content.
212-
* Your job is to extract memories related to the user's long-term goals, interests, and emotional states.
213-
* Each memory should be a dictionary with the following keys:
214-
- "memory": The content of the memory (string). Rephrase the content if necessary.
215-
- "metadata": A dictionary containing additional information about the memory.
216-
* The metadata dictionary should include:
217-
- "type": The type of memory (string), e.g., "procedure", "fact", "event", "opinion", etc.
218-
- "memory_time": The time the memory occurred or refers to (string). Must be in standard `YYYY-MM-DD` format. Relative expressions such as "yesterday" or "tomorrow" are not allowed.
219-
- "source": The origin of the memory (string), e.g., `"conversation"`, `"retrieved"`, `"web"`, `"file"`.
220-
- "confidence": A numeric score (float between 0 and 100) indicating how certain you are about the accuracy or reliability of the memory.
221-
- "entities": A list of key entities (array of strings) mentioned in the memory, e.g., people, places, organizations, e.g., `["Alice", "Paris", "OpenAI"]`.
222-
- "tags": A list of keywords or thematic labels (array of strings) associated with the memory for categorization or retrieval, e.g., `["travel", "health", "project-x"]`.
223-
- "visibility": The accessibility scope of the memory (string), e.g., `"private"`, `"public"`, `"session"`, determining who or what contexts can access it.
224-
- "updated_at": The timestamp of the last modification to the memory (string). Useful for tracking memory freshness or change history. Format: ISO 8601 or natural language.
225-
* Current date and time is {datetime.now().isoformat()}.
226-
* Only return the list of memories in JSON format.
227-
* Do not include any explanations
228-
* Do not include any extra text
229-
* Do not include code blocks (```json```)
230-
231-
## Example
232-
233-
### Input
234-
235-
[
236-
{{"role": "user", "content": "I plan to visit Paris next week."}},
237-
{{"role": "assistant", "content": "Paris is a beautiful city with many attractions."}},
238-
{{"role": "user", "content": "I love the Eiffel Tower."}},
239-
{{"role": "assistant", "content": "The Eiffel Tower is a must-see landmark in Paris."}}
240-
]
241-
242-
### Output
243-
244-
[
245-
{{
246-
"memory": "The user plans to visit Paris on 05-26-2025.",
247-
"metadata": {{
248-
"type": "event",
249-
"memory_time": "2025-05-26",
250-
"source": "conversation",
251-
"confidence": 90.0,
252-
"entities": ["Paris"],
253-
"tags": ["travel", "plans"],
254-
"visibility": "private",
255-
"updated_at": "2025-05-19T00:00:00"
256-
}}
257-
}},
258-
{{
259-
"memory": "The user loves the Eiffel Tower.",
260-
"metadata": {{
261-
"type": "opinion",
262-
"memory_time": "2025-05-19",
263-
"source": "conversation",
264-
"confidence": 100.0,
265-
"entities": ["Eiffel Tower"],
266-
"tags": ["opinions", "landmarks"],
267-
"visibility": "session",
268-
"updated_at": "2025-05-19T00:00:00"
269-
}}
270-
}}
271-
]
272-
273-
"""
274-
275-
EXTRACTION_PROMPT_PART_2 = """
276-
## Query
277-
278-
### Input
279-
280-
{messages}
281-
282-
### Output
283-
284-
"""
285-
286-
EXTRACTION_RETRY_LOG = """Extracting memory failed due to JSON decode error: {error},
287-
Attempt retry: {attempt_number} / {max_attempt_number}
288-
"""
222+
def parse_json_result(self, response_text):
223+
try:
224+
json_start = response_text.find("{")
225+
response_text = response_text[json_start:]
226+
response_text = response_text.replace("```", "").strip()
227+
if response_text[-1] != "}":
228+
response_text += "}"
229+
response_json = json.loads(response_text)
230+
return response_json
231+
except json.JSONDecodeError as e:
232+
logger.warning(
233+
f"Failed to parse LLM response as JSON: {e}\nRaw response:\n{response_text}"
234+
)
235+
return {}

src/memos/memories/textual/item.py

Lines changed: 4 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -27,23 +27,14 @@ class TextualMemoryMetadata(BaseModel):
2727
default="activated",
2828
description="The status of the memory, e.g., 'activated', 'archived', 'deleted'.",
2929
)
30-
type: Literal["procedure", "fact", "event", "opinion", "topic", "reasoning"] | None = Field(
31-
default=None
32-
)
33-
memory_time: str | None = Field(
34-
default=None,
35-
description='The time the memory occurred or refers to. Must be in standard `YYYY-MM-DD` format. Relative expressions such as "yesterday" or "tomorrow" are not allowed.',
36-
)
37-
source: Literal["conversation", "retrieved", "web", "file"] | None = Field(
38-
default=None, description="The origin of the memory"
39-
)
30+
type: str | None = Field(default=None)
31+
key: str | None = Field(default=None, description="Memory key or title.")
4032
confidence: float | None = Field(
4133
default=None,
4234
description="A numeric score (float between 0 and 100) indicating how certain you are about the accuracy or reliability of the memory.",
4335
)
44-
entities: list[str] | None = Field(
45-
default=None,
46-
description='A list of key entities mentioned in the memory, e.g., people, places, organizations, e.g., `["Alice", "Paris", "OpenAI"]`.',
36+
source: Literal["conversation", "retrieved", "web", "file"] | None = Field(
37+
default=None, description="The origin of the memory"
4738
)
4839
tags: list[str] | None = Field(
4940
default=None,
@@ -59,23 +50,6 @@ class TextualMemoryMetadata(BaseModel):
5950

6051
model_config = ConfigDict(extra="allow")
6152

62-
@field_validator("memory_time")
63-
@classmethod
64-
def validate_memory_time(cls, v):
65-
try:
66-
if v:
67-
datetime.strptime(v, "%Y-%m-%d")
68-
except ValueError as e:
69-
raise ValueError("Invalid date format. Use YYYY-MM-DD.") from e
70-
return v
71-
72-
@field_validator("confidence")
73-
@classmethod
74-
def validate_confidence(cls, v):
75-
if v is not None and (v < 0 or v > 100):
76-
raise ValueError("Confidence must be between 0 and 100.")
77-
return v
78-
7953
def __str__(self) -> str:
8054
"""Pretty string representation of the metadata."""
8155
meta = self.model_dump(exclude_none=True)
@@ -88,7 +62,6 @@ class TreeNodeTextualMemoryMetadata(TextualMemoryMetadata):
8862
memory_type: Literal["WorkingMemory", "LongTermMemory", "UserMemory"] = Field(
8963
default="WorkingMemory", description="Memory lifecycle type."
9064
)
91-
key: str | None = Field(default=None, description="Memory key or title.")
9265
sources: list[str] | None = Field(
9366
default=None, description="Multiple origins of the memory (e.g., URLs, notes)."
9467
)
@@ -148,7 +121,6 @@ class TextualMemoryItem(BaseModel):
148121

149122
model_config = ConfigDict(extra="forbid")
150123

151-
@field_validator("id")
152124
@classmethod
153125
def validate_id(cls, v):
154126
try:

0 commit comments

Comments
 (0)