Skip to content

Commit fd40607

Browse files
committed
Merge branch 'dev-20260309-v2.0.9' of github.com:MemTensor/MemOS into dev-20260309-v2.0.9
2 parents 3342799 + fe5d8da commit fd40607

File tree

13 files changed

+113
-18
lines changed

13 files changed

+113
-18
lines changed

src/memos/api/handlers/memory_handler.py

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,10 @@ def handle_delete_memories(delete_mem_req: DeleteMemoryRequest, naive_mem_cube:
320320
Now unified to delete from text_mem only (includes preferences).
321321
"""
322322
logger.info(
323-
f"[Delete memory request] writable_cube_ids: {delete_mem_req.writable_cube_ids}, memory_ids: {delete_mem_req.memory_ids}"
323+
"[Delete memory request] writable_cube_ids: %s, memory_ids: %s, auto_cleanup_working: %s",
324+
delete_mem_req.writable_cube_ids,
325+
delete_mem_req.memory_ids,
326+
getattr(delete_mem_req, "auto_cleanup_working", False),
324327
)
325328
# Validate that only one of memory_ids, file_ids, or filter is provided
326329
provided_params = [
@@ -335,6 +338,31 @@ def handle_delete_memories(delete_mem_req: DeleteMemoryRequest, naive_mem_cube:
335338
)
336339

337340
try:
341+
working_ids_to_delete: set[str] = set()
342+
# When deleting by explicit memory_ids and auto_cleanup_working is enabled,
343+
# collect related WorkingMemory ids from working_binding
344+
if delete_mem_req.memory_ids is not None and getattr(
345+
delete_mem_req, "auto_cleanup_working", False
346+
):
347+
try:
348+
memories = naive_mem_cube.text_mem.get_by_ids(memory_ids=delete_mem_req.memory_ids)
349+
except Exception as e:
350+
logger.warning("Failed to fetch memories before delete for working cleanup: %s", e)
351+
memories = []
352+
353+
if memories:
354+
import re
355+
356+
pattern = re.compile(r"\[working_binding:([0-9a-fA-F-]{36})\]")
357+
for mem in memories:
358+
metadata = mem.get("metadata") or {}
359+
bg = metadata.get("background") or ""
360+
if not isinstance(bg, str):
361+
continue
362+
match = pattern.search(bg)
363+
if match:
364+
working_ids_to_delete.add(match.group(1))
365+
338366
if delete_mem_req.memory_ids is not None:
339367
# Unified deletion from text_mem (includes preferences)
340368
naive_mem_cube.text_mem.delete_by_memory_ids(delete_mem_req.memory_ids)
@@ -344,6 +372,17 @@ def handle_delete_memories(delete_mem_req: DeleteMemoryRequest, naive_mem_cube:
344372
)
345373
elif delete_mem_req.filter is not None:
346374
naive_mem_cube.text_mem.delete_by_filter(filter=delete_mem_req.filter)
375+
376+
# After main deletion, optionally clean up related WorkingMemory nodes.
377+
if working_ids_to_delete:
378+
try:
379+
logger.info(
380+
"Auto-cleanup WorkingMemory nodes after delete, count=%d",
381+
len(working_ids_to_delete),
382+
)
383+
naive_mem_cube.text_mem.delete_by_memory_ids(list(working_ids_to_delete))
384+
except Exception as e:
385+
logger.warning("Failed to auto-cleanup WorkingMemory nodes: %s, Pass", e)
347386
except Exception as e:
348387
logger.error(f"Failed to delete memories: {e}", exc_info=True)
349388
return DeleteMemoryResponse(

src/memos/api/product_models.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -858,6 +858,13 @@ class DeleteMemoryRequest(BaseRequest):
858858
memory_ids: list[str] | None = Field(None, description="Memory IDs")
859859
file_ids: list[str] | None = Field(None, description="File IDs")
860860
filter: dict[str, Any] | None = Field(None, description="Filter for the memory")
861+
auto_cleanup_working: bool | None = Field(
862+
False,
863+
description=(
864+
"(Internal) Whether to automatically delete related WorkingMemory nodes "
865+
"based on working_binding metadata when deleting by memory_ids."
866+
),
867+
)
861868

862869

863870
class SuggestionRequest(BaseRequest):

src/memos/graph_dbs/polardb.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -254,8 +254,13 @@ def _get_connection(self):
254254
if not self._semaphore.acquire(timeout=timeout):
255255
logger.warning(f"Timeout waiting for connection slot ({timeout}s)")
256256
raise RuntimeError(
257-
f"Connection pool busy: could not acquire a slot within {timeout}s (all connections in use)."
257+
f"Connection pool busy: acquire a slot within {timeout}s (all connections in use)."
258258
)
259+
logger.info(
260+
"Connection pool usage: %s/%s",
261+
self.connection_pool.maxconn - self._semaphore._value,
262+
self.connection_pool.maxconn,
263+
)
259264
conn = None
260265
broken = False
261266

@@ -264,7 +269,7 @@ def _get_connection(self):
264269
logger.debug(f"Acquired connection {id(conn)} from pool")
265270
conn.autocommit = True
266271
with conn.cursor() as cur:
267-
cur.execute("SELECT 1")
272+
cur.execute(f'SET search_path = {self.db_name}_graph, ag_catalog, "$user", public;')
268273
yield conn
269274
except Exception as e:
270275
broken = True
@@ -1777,6 +1782,7 @@ def search_by_fulltext(
17771782
)
17781783
where_clause_cte = f"WHERE {' AND '.join(where_with_q)}" if where_with_q else ""
17791784
query = f"""
1785+
/*+ Set(max_parallel_workers_per_gather 0) */
17801786
WITH q AS (SELECT to_tsquery('{tsquery_config}', %s) AS fq)
17811787
SELECT {select_cols}
17821788
FROM "{self.db_name}_graph"."Memory" m

src/memos/mem_reader/multi_modal_struct.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -819,7 +819,7 @@ def _process_one_item(
819819
if result:
820820
fine_memory_items.extend(result)
821821
except Exception as e:
822-
logger.error(f"[MultiModalFine] worker error: {e}")
822+
logger.error(f"[MultiModalFine] worker error: {e} {traceback.format_exc()}")
823823

824824
# related preceding and following rawfilememories
825825
fine_memory_items = self._relate_preceding_following_rawfile_memories(fine_memory_items)

src/memos/mem_reader/read_multi_modal/file_content_parser.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,7 @@ def create_source(
367367
) -> SourceMessage:
368368
"""Create SourceMessage from file content part."""
369369
if isinstance(message, dict):
370-
file_info = message.get("file", {})
370+
file_info = message.get("file", {}) or {}
371371
source_dict = {
372372
"type": "file",
373373
"doc_path": file_info.get("filename") or file_info.get("file_id", ""),
@@ -470,7 +470,7 @@ def parse_fast(
470470
file_data = file_info.get("file_data", "")
471471
file_id = file_info.get("file_id", "")
472472
filename = file_info.get("filename", "")
473-
file_url_flag = False
473+
file_url_flag = bool(file_info)
474474
# Build content string based on available information
475475
content_parts = []
476476

@@ -651,6 +651,9 @@ def parse_fine(
651651
file_id = file_info.get("file_id", "")
652652
filename = file_info.get("filename", "")
653653

654+
# Whether to keep full file_info in sources
655+
file_url_flag = bool(file_info)
656+
654657
# Extract custom_tags from kwargs (for LLM extraction)
655658
custom_tags = kwargs.get("custom_tags")
656659

@@ -683,6 +686,7 @@ def parse_fine(
683686
url_str = file_data[1:] if file_data.startswith("@") else file_data
684687

685688
if url_str.startswith(("http://", "https://")):
689+
file_url_flag = True
686690
parsed_text, temp_file_path, is_markdown = self._handle_url(
687691
url_str, filename
688692
)
@@ -793,6 +797,7 @@ def _make_memory_item(
793797
chunk_index=chunk_idx,
794798
chunk_total=total_chunks,
795799
chunk_content=chunk_content,
800+
file_url_flag=file_url_flag,
796801
)
797802
return TextualMemoryItem(
798803
memory=value,

src/memos/mem_reader/read_multi_modal/image_parser.py

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -51,15 +51,23 @@ def create_source(
5151
if isinstance(image_url, dict):
5252
url = image_url.get("url", "")
5353
detail = image_url.get("detail", "auto")
54+
image_info = image_url
55+
return SourceMessage(
56+
type="image",
57+
content=url,
58+
url=url,
59+
detail=detail,
60+
image_info=image_info,
61+
)
5462
else:
5563
url = str(image_url)
5664
detail = "auto"
57-
return SourceMessage(
58-
type="image",
59-
content=url,
60-
url=url,
61-
detail=detail,
62-
)
65+
return SourceMessage(
66+
type="image",
67+
content=url,
68+
url=url,
69+
detail=detail,
70+
)
6371
return SourceMessage(type="image", content=str(message))
6472

6573
def rebuild_from_source(
@@ -74,11 +82,16 @@ def rebuild_from_source(
7482
or (source.content or "").replace("[image_url]: ", "")
7583
)
7684
detail = getattr(source, "detail", "auto")
85+
image_id = ""
86+
image_info = source.image_info
87+
if image_info and isinstance(image_info, dict):
88+
image_id = image_info.get("image_id")
7789
return {
7890
"type": "image_url",
7991
"image_url": {
8092
"url": url,
8193
"detail": detail,
94+
"image_id": str(image_id),
8295
},
8396
}
8497

src/memos/mem_reader/read_multi_modal/multi_modal_parser.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
in both fast and fine modes.
55
"""
66

7+
import traceback
8+
79
from typing import Any
810

911
from memos.embedders.base import BaseEmbedder
@@ -248,7 +250,10 @@ def process_transfer(
248250
try:
249251
message = parser.rebuild_from_source(source)
250252
except Exception as e:
251-
logger.error(f"[MultiModalParser] Error rebuilding message from source: {e}")
253+
logger.error(
254+
f"[MultiModalParser] Error rebuilding message from "
255+
f"source: {e} {traceback.format_exc()}"
256+
)
252257
return []
253258

254259
# Parse in fine mode (pass context_items and custom_tags to parse_fine)

src/memos/mem_reader/read_multi_modal/user_parser.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ def create_source(
114114
chat_time=chat_time,
115115
message_id=message_id,
116116
image_path=image_info.get("url"),
117+
image_info=image_info,
117118
)
118119
source.lang = overall_lang
119120
sources.append(source)

src/memos/mem_reader/read_multi_modal/utils.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@
4545
)
4646

4747

48+
KEYS_DROP_LABEL = r"(text|type|image_url|imageurl|url|file|file_id|image_id|file_data)"
49+
ID_KEYS_DROP_VALUE = r"(file_id|image_id)"
50+
51+
4852
def parse_json_result(response_text: str) -> dict:
4953
"""
5054
Parse JSON result from LLM response.
@@ -356,13 +360,25 @@ def detect_lang(text):
356360
cleaned_text = re.sub(r"\[[\d\-:\s]+\]", "", cleaned_text)
357361
# remove URLs to prevent the dilution of Chinese characters
358362
cleaned_text = re.sub(r'https?://[^\s<>"{}|\\^`\[\]]+', "", cleaned_text)
359-
# remove MessageType schema keywords (multimodal JSON noise)
363+
# remove common id-like tokens (uuid-ish / file_id / image_id /
364+
# my_id_01 etc.)
365+
# uuid
366+
cleaned_text = re.sub(
367+
r"\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\b",
368+
" ",
369+
cleaned_text,
370+
flags=re.IGNORECASE,
371+
)
372+
# key:value where key ends with _id or is id, and value is quoted or bare token
373+
cleaned_text = re.sub(
374+
r'(?i)\b[a-z_]*id\b\s*[:=]\s*(".*?"|\'.*?\'|[a-z0-9_\-]+)', " ", cleaned_text
375+
)
360376
cleaned_text = re.sub(
361-
r"\b(text|type|image_url|imageurl|url)\b", "", cleaned_text, flags=re.IGNORECASE
377+
r'(?i)\b[a-z_]*_id\b\s*[:=]\s*(".*?"|\'.*?\'|[a-z0-9_\-]+)', " ", cleaned_text
362378
)
363379
# remove schema keywords like text / type / image_url / url
364380
cleaned_text = re.sub(
365-
r"\b(text|type|image_url|imageurl|url|file|file_id)\b",
381+
r"\b(text|type|image_url|imageurl|url|file|file_id|image_id|file_data)\b",
366382
"",
367383
cleaned_text,
368384
flags=re.IGNORECASE,

src/memos/memories/textual/item.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ class SourceMessage(BaseModel):
4242
content: str | None = None
4343
doc_path: str | None = None
4444
file_info: dict | None = None
45+
image_info: dict | None = None
4546
model_config = ConfigDict(extra="allow")
4647

4748

0 commit comments

Comments
 (0)