Skip to content

Commit 220b83c

Browse files
authored
Merge branch 'dev' into feat/mos-product-api-0716
2 parents ae89138 + a355cdd commit 220b83c

File tree

10 files changed

+86
-449
lines changed

10 files changed

+86
-449
lines changed

examples/core_memories/general_textual_memory.py

Lines changed: 10 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from memos.configs.memory import MemoryConfigFactory
22
from memos.memories.factory import MemoryFactory
33

4-
54
config = MemoryConfigFactory(
65
backend="general_text",
76
config={
@@ -36,33 +35,24 @@
3635
{
3736
"memory": "I'm a RUCer, I'm happy.",
3837
"metadata": {
39-
"type": "self-introduction",
40-
"memory_time": "2025-05-26",
38+
"key": "happy RUCer",
4139
"source": "conversation",
42-
"confidence": 90.0,
43-
"entities": ["RUCer"],
4440
"tags": ["happy"],
45-
"visibility": "private",
4641
"updated_at": "2025-05-19T00:00:00",
4742
},
4843
},
4944
{
5045
"memory": "MemOS is awesome!",
5146
"metadata": {
52-
"type": "fact",
53-
"memory_time": "2025-05-19",
47+
"key": "MemOS",
5448
"source": "conversation",
55-
"confidence": 100.0,
56-
"entities": ["MemOS"],
5749
"tags": ["awesome"],
58-
"visibility": "public",
5950
"updated_at": "2025-05-19T00:00:00",
6051
},
6152
},
6253
]
6354
example_id = "a19b6caa-5d59-42ad-8c8a-e4f7118435b4"
6455

65-
6656
print("===== Extract memories =====")
6757
memories = m.extract(
6858
[
@@ -80,7 +70,12 @@
8070
{
8171
"id": example_id,
8272
"memory": "User is Chinese.",
83-
"metadata": {"type": "opinion"},
73+
"metadata": {
74+
"key": "User Nationality",
75+
"source": "conversation",
76+
"tags": ["Nationality"],
77+
"updated_at": "2025-05-18T00:00:00",
78+
},
8479
}
8580
]
8681
)
@@ -103,13 +98,9 @@
10398
"id": example_id,
10499
"memory": "User is Canadian.",
105100
"metadata": {
106-
"type": "opinion",
107-
"confidence": 85,
108-
"memory_time": "2025-05-24",
101+
"key": "User Nationality",
109102
"source": "conversation",
110-
"entities": ["Canadian"],
111-
"tags": ["happy"],
112-
"visibility": "private",
103+
"tags": ["Nationality"],
113104
"updated_at": "2025-05-19T00:00:00",
114105
},
115106
},

poetry.lock

Lines changed: 1 addition & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ dependencies = [
4444
"sqlalchemy (>=2.0.41,<3.0.0)", # SQL toolkit
4545
"scikit-learn (>=1.7.0,<2.0.0)", # Machine learning
4646
"fastmcp (>=2.10.5,<3.0.0)",
47+
"python-dateutil (>=2.9.0.post0,<3.0.0)",
4748
]
4849

4950
[project.urls]

src/memos/mem_reader/simple_struct.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,13 @@ def _process_chat_data(self, scene_data_info, info):
5858
metadata=TreeNodeTextualMemoryMetadata(
5959
user_id=info.get("user_id"),
6060
session_id=info.get("session_id"),
61-
memory_type=memory_i_raw.get("memory_type", ""),
61+
memory_type=memory_i_raw.get("memory_type", "")
62+
.replace("长期记忆", "LongTermMemory")
63+
.replace("用户记忆", "UserMemory"),
6264
status="activated",
63-
tags=memory_i_raw.get("tags", ""),
65+
tags=memory_i_raw.get("tags", [])
66+
if type(memory_i_raw.get("tags", [])) is list
67+
else [],
6468
key=memory_i_raw.get("key", ""),
6569
embedding=self.embedder.embed([memory_i_raw.get("value", "")])[0],
6670
usage=[],
@@ -214,7 +218,7 @@ def _process_doc_data(self, scene_data_info, info):
214218
session_id=info.get("session_id"),
215219
memory_type="LongTermMemory",
216220
status="activated",
217-
tags=chunk_res["tags"],
221+
tags=chunk_res["tags"] if type(chunk_res["tags"]) is list else [],
218222
key=chunk_res["key"],
219223
embedding=self.embedder.embed([chunk_res["value"]])[0],
220224
usage=[],

src/memos/memories/textual/general.py

Lines changed: 35 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,11 @@
1212
from memos.log import get_logger
1313
from memos.memories.textual.base import BaseTextMemory
1414
from memos.memories.textual.item import TextualMemoryItem
15+
from memos.templates.mem_reader_prompts import SIMPLE_STRUCT_MEM_READER_PROMPT
1516
from memos.types import MessageList
1617
from memos.vec_dbs.factory import QdrantVecDB, VecDBFactory
1718
from memos.vec_dbs.item import VecDBItem
1819

19-
2020
logger = get_logger(__name__)
2121

2222

@@ -36,7 +36,7 @@ def __init__(self, config: GeneralTextMemoryConfig):
3636
stop=stop_after_attempt(3),
3737
retry=retry_if_exception_type(json.JSONDecodeError),
3838
before_sleep=lambda retry_state: logger.warning(
39-
EXTRACTION_RETRY_LOG.format(
39+
"Extracting memory failed due to JSON decode error: {error}, Attempt retry: {attempt_number} / {max_attempt_number}".format(
4040
error=retry_state.outcome.exception(),
4141
attempt_number=retry_state.attempt_number,
4242
max_attempt_number=3,
@@ -52,14 +52,27 @@ def extract(self, messages: MessageList) -> list[TextualMemoryItem]:
5252
Returns:
5353
List of TextualMemoryItem objects representing the extracted memories.
5454
"""
55-
str_messages = json.dumps(messages)
56-
user_query = EXTRACTION_PROMPT_PART_1 + EXTRACTION_PROMPT_PART_2.format(
57-
messages=str_messages
55+
56+
str_messages = "\n".join(
57+
[message["role"] + ":" + message["content"] for message in messages]
5858
)
59-
response = self.extractor_llm.generate([{"role": "user", "content": user_query}])
60-
raw_extracted_memories = json.loads(response)
59+
60+
prompt = SIMPLE_STRUCT_MEM_READER_PROMPT.replace("${conversation}", str_messages)
61+
messages = [{"role": "user", "content": prompt}]
62+
response_text = self.extractor_llm.generate(messages)
63+
response_json = self.parse_json_result(response_text)
64+
6165
extracted_memories = [
62-
TextualMemoryItem(**memory_dict) for memory_dict in raw_extracted_memories
66+
TextualMemoryItem(
67+
memory=memory_dict["value"],
68+
metadata={
69+
"key": memory_dict["key"],
70+
"source": "conversation",
71+
"tags": memory_dict["tags"],
72+
"updated_at": datetime.now().isoformat(),
73+
},
74+
)
75+
for memory_dict in response_json["memory list"]
6376
]
6477

6578
return extracted_memories
@@ -206,83 +219,17 @@ def _embed_one_sentence(self, sentence: str) -> list[float]:
206219
"""Embed a single sentence."""
207220
return self.embedder.embed([sentence])[0]
208221

209-
210-
EXTRACTION_PROMPT_PART_1 = f"""You are a memory extractor. Your task is to extract memories from the given messages.
211-
* You will receive a list of messages, each with a role (user or assistant) and content.
212-
* Your job is to extract memories related to the user's long-term goals, interests, and emotional states.
213-
* Each memory should be a dictionary with the following keys:
214-
- "memory": The content of the memory (string). Rephrase the content if necessary.
215-
- "metadata": A dictionary containing additional information about the memory.
216-
* The metadata dictionary should include:
217-
- "type": The type of memory (string), e.g., "procedure", "fact", "event", "opinion", etc.
218-
- "memory_time": The time the memory occurred or refers to (string). Must be in standard `YYYY-MM-DD` format. Relative expressions such as "yesterday" or "tomorrow" are not allowed.
219-
- "source": The origin of the memory (string), e.g., `"conversation"`, `"retrieved"`, `"web"`, `"file"`.
220-
- "confidence": A numeric score (float between 0 and 100) indicating how certain you are about the accuracy or reliability of the memory.
221-
- "entities": A list of key entities (array of strings) mentioned in the memory, e.g., people, places, organizations, e.g., `["Alice", "Paris", "OpenAI"]`.
222-
- "tags": A list of keywords or thematic labels (array of strings) associated with the memory for categorization or retrieval, e.g., `["travel", "health", "project-x"]`.
223-
- "visibility": The accessibility scope of the memory (string), e.g., `"private"`, `"public"`, `"session"`, determining who or what contexts can access it.
224-
- "updated_at": The timestamp of the last modification to the memory (string). Useful for tracking memory freshness or change history. Format: ISO 8601 or natural language.
225-
* Current date and time is {datetime.now().isoformat()}.
226-
* Only return the list of memories in JSON format.
227-
* Do not include any explanations
228-
* Do not include any extra text
229-
* Do not include code blocks (```json```)
230-
231-
## Example
232-
233-
### Input
234-
235-
[
236-
{{"role": "user", "content": "I plan to visit Paris next week."}},
237-
{{"role": "assistant", "content": "Paris is a beautiful city with many attractions."}},
238-
{{"role": "user", "content": "I love the Eiffel Tower."}},
239-
{{"role": "assistant", "content": "The Eiffel Tower is a must-see landmark in Paris."}}
240-
]
241-
242-
### Output
243-
244-
[
245-
{{
246-
"memory": "The user plans to visit Paris on 05-26-2025.",
247-
"metadata": {{
248-
"type": "event",
249-
"memory_time": "2025-05-26",
250-
"source": "conversation",
251-
"confidence": 90.0,
252-
"entities": ["Paris"],
253-
"tags": ["travel", "plans"],
254-
"visibility": "private",
255-
"updated_at": "2025-05-19T00:00:00"
256-
}}
257-
}},
258-
{{
259-
"memory": "The user loves the Eiffel Tower.",
260-
"metadata": {{
261-
"type": "opinion",
262-
"memory_time": "2025-05-19",
263-
"source": "conversation",
264-
"confidence": 100.0,
265-
"entities": ["Eiffel Tower"],
266-
"tags": ["opinions", "landmarks"],
267-
"visibility": "session",
268-
"updated_at": "2025-05-19T00:00:00"
269-
}}
270-
}}
271-
]
272-
273-
"""
274-
275-
EXTRACTION_PROMPT_PART_2 = """
276-
## Query
277-
278-
### Input
279-
280-
{messages}
281-
282-
### Output
283-
284-
"""
285-
286-
EXTRACTION_RETRY_LOG = """Extracting memory failed due to JSON decode error: {error},
287-
Attempt retry: {attempt_number} / {max_attempt_number}
288-
"""
222+
def parse_json_result(self, response_text):
223+
try:
224+
json_start = response_text.find("{")
225+
response_text = response_text[json_start:]
226+
response_text = response_text.replace("```", "").strip()
227+
if response_text[-1] != "}":
228+
response_text += "}"
229+
response_json = json.loads(response_text)
230+
return response_json
231+
except json.JSONDecodeError as e:
232+
logger.warning(
233+
f"Failed to parse LLM response as JSON: {e}\nRaw response:\n{response_text}"
234+
)
235+
return {}

src/memos/memories/textual/item.py

Lines changed: 4 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -27,23 +27,14 @@ class TextualMemoryMetadata(BaseModel):
2727
default="activated",
2828
description="The status of the memory, e.g., 'activated', 'archived', 'deleted'.",
2929
)
30-
type: Literal["procedure", "fact", "event", "opinion", "topic", "reasoning"] | None = Field(
31-
default=None
32-
)
33-
memory_time: str | None = Field(
34-
default=None,
35-
description='The time the memory occurred or refers to. Must be in standard `YYYY-MM-DD` format. Relative expressions such as "yesterday" or "tomorrow" are not allowed.',
36-
)
37-
source: Literal["conversation", "retrieved", "web", "file"] | None = Field(
38-
default=None, description="The origin of the memory"
39-
)
30+
type: str | None = Field(default=None)
31+
key: str | None = Field(default=None, description="Memory key or title.")
4032
confidence: float | None = Field(
4133
default=None,
4234
description="A numeric score (float between 0 and 100) indicating how certain you are about the accuracy or reliability of the memory.",
4335
)
44-
entities: list[str] | None = Field(
45-
default=None,
46-
description='A list of key entities mentioned in the memory, e.g., people, places, organizations, e.g., `["Alice", "Paris", "OpenAI"]`.',
36+
source: Literal["conversation", "retrieved", "web", "file"] | None = Field(
37+
default=None, description="The origin of the memory"
4738
)
4839
tags: list[str] | None = Field(
4940
default=None,
@@ -59,23 +50,6 @@ class TextualMemoryMetadata(BaseModel):
5950

6051
model_config = ConfigDict(extra="allow")
6152

62-
@field_validator("memory_time")
63-
@classmethod
64-
def validate_memory_time(cls, v):
65-
try:
66-
if v:
67-
datetime.strptime(v, "%Y-%m-%d")
68-
except ValueError as e:
69-
raise ValueError("Invalid date format. Use YYYY-MM-DD.") from e
70-
return v
71-
72-
@field_validator("confidence")
73-
@classmethod
74-
def validate_confidence(cls, v):
75-
if v is not None and (v < 0 or v > 100):
76-
raise ValueError("Confidence must be between 0 and 100.")
77-
return v
78-
7953
def __str__(self) -> str:
8054
"""Pretty string representation of the metadata."""
8155
meta = self.model_dump(exclude_none=True)
@@ -88,7 +62,6 @@ class TreeNodeTextualMemoryMetadata(TextualMemoryMetadata):
8862
memory_type: Literal["WorkingMemory", "LongTermMemory", "UserMemory"] = Field(
8963
default="WorkingMemory", description="Memory lifecycle type."
9064
)
91-
key: str | None = Field(default=None, description="Memory key or title.")
9265
sources: list[str] | None = Field(
9366
default=None, description="Multiple origins of the memory (e.g., URLs, notes)."
9467
)
@@ -148,7 +121,6 @@ class TextualMemoryItem(BaseModel):
148121

149122
model_config = ConfigDict(extra="forbid")
150123

151-
@field_validator("id")
152124
@classmethod
153125
def validate_id(cls, v):
154126
try:

src/memos/memories/textual/tree_text_memory/organize/conflict.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import re
33

44
from datetime import datetime
5-
5+
from dateutil import parser
66
from memos.embedders.base import BaseEmbedder
77
from memos.graph_dbs.neo4j import Neo4jGraphDB
88
from memos.llms.base import BaseLLM
@@ -133,8 +133,8 @@ def _hard_update(self, memory_a: TextualMemoryItem, memory_b: TextualMemoryItem)
133133
"""
134134
Hard update: compare updated_at, keep the newer one, overwrite the older one's metadata.
135135
"""
136-
time_a = datetime.fromisoformat(memory_a.metadata.updated_at)
137-
time_b = datetime.fromisoformat(memory_b.metadata.updated_at)
136+
time_a = parser.isoparse(memory_a.metadata.updated_at)
137+
time_b = parser.isoparse(memory_b.metadata.updated_at)
138138

139139
newer_mem = memory_a if time_a >= time_b else memory_b
140140
older_mem = memory_b if time_a >= time_b else memory_a

src/memos/templates/mem_reader_prompts.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@
150150
"summary": "Tom is currently focused on managing a new project with a tight schedule. After a team meeting on June 25, 2025, he realized the original deadline of December 15 might not be feasible due to backend delays. Concerned about insufficient testing time, he welcomed Jerry’s suggestion of proposing an extension. Tom plans to raise the idea of shifting the deadline to January 5, 2026 in the next morning’s meeting. His actions reflect both stress about timelines and a proactive, team-oriented problem-solving approach."
151151
}
152152
153-
Another Example in Chinese (注意: 你的输出必须和输入的user语言一致):
153+
Another Example in Chinese (注意: 当user的语言为中文时,你就需要也输出中文):
154154
{
155155
"memory list": [
156156
{

0 commit comments

Comments
 (0)