Skip to content

Commit d0cea34

Browse files
author
yuan.wang
committed
merge dev
2 parents 87091d6 + 5373b14 commit d0cea34

File tree

27 files changed

+1942
-894
lines changed

27 files changed

+1942
-894
lines changed

examples/mem_reader/multimodal_struct_reader.py

Lines changed: 764 additions & 688 deletions
Large diffs are not rendered by default.

src/memos/api/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -426,7 +426,7 @@ def get_embedder_config() -> dict[str, Any]:
426426
def get_reader_config() -> dict[str, Any]:
427427
"""Get reader configuration."""
428428
return {
429-
"backend": os.getenv("MEM_READER_BACKEND", "simple_struct"),
429+
"backend": os.getenv("MEM_READER_BACKEND", "multimodal_struct"),
430430
"config": {
431431
"chunk_type": os.getenv("MEM_READER_CHAT_CHUNK_TYPE", "default"),
432432
"chunk_length": int(os.getenv("MEM_READER_CHAT_CHUNK_TOKEN_SIZE", 1600)),

src/memos/api/handlers/component_init.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
from memos.memories.textual.simple_preference import SimplePreferenceTextMemory
4242
from memos.memories.textual.simple_tree import SimpleTreeTextMemory
4343
from memos.memories.textual.tree_text_memory.organize.manager import MemoryManager
44+
from memos.memories.textual.tree_text_memory.retrieve.retrieve_utils import FastTokenizer
4445

4546

4647
if TYPE_CHECKING:
@@ -196,6 +197,7 @@ def init_server() -> dict[str, Any]:
196197

197198
logger.debug("Memory manager initialized")
198199

200+
tokenizer = FastTokenizer()
199201
# Initialize text memory
200202
text_mem = SimpleTreeTextMemory(
201203
llm=llm,
@@ -206,6 +208,7 @@ def init_server() -> dict[str, Any]:
206208
memory_manager=memory_manager,
207209
config=default_cube_config.text_mem.config,
208210
internet_retriever=internet_retriever,
211+
tokenizer=tokenizer,
209212
)
210213

211214
logger.debug("Text memory initialized")

src/memos/api/product_models.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,12 @@ class APISearchRequest(BaseRequest):
388388
description="(Internal) Operation definitions for multi-cube read permissions.",
389389
)
390390

391+
# ==== Source for plugin ====
392+
source: str | None = Field(
393+
None,
394+
description="Source of the search query [plugin will router diff search]",
395+
)
396+
391397
@model_validator(mode="after")
392398
def _convert_deprecated_fields(self) -> "APISearchRequest":
393399
"""
@@ -469,7 +475,7 @@ class APIADDRequest(BaseRequest):
469475
),
470476
)
471477

472-
info: dict[str, str] | None = Field(
478+
info: dict[str, Any] | None = Field(
473479
None,
474480
description=(
475481
"Additional metadata for the add request. "

src/memos/graph_dbs/neo4j.py

Lines changed: 77 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1441,17 +1441,24 @@ def build_filter_condition(condition_dict: dict, param_counter: list) -> tuple[s
14411441
f"{node_alias}.{key} {cypher_op} ${param_name}"
14421442
)
14431443
elif op == "contains":
1444-
# Handle contains operator (for array fields like tags, sources)
1445-
param_name = f"filter_{key}_{op}_{param_counter[0]}"
1446-
param_counter[0] += 1
1447-
params[param_name] = op_value
1448-
1449-
# For array fields, check if element is in array
1450-
if key in ("tags", "sources"):
1451-
condition_parts.append(f"${param_name} IN {node_alias}.{key}")
1452-
else:
1453-
# For non-array fields, contains might not be applicable, but we'll treat it as IN for consistency
1454-
condition_parts.append(f"${param_name} IN {node_alias}.{key}")
1444+
# Handle contains operator (for array fields)
1445+
# Only supports array format: {"field": {"contains": ["value1", "value2"]}}
1446+
# Single string values are not supported, use array format instead: {"field": {"contains": ["value"]}}
1447+
if not isinstance(op_value, list):
1448+
raise ValueError(
1449+
f"contains operator only supports array format. "
1450+
f"Use {{'{key}': {{'contains': ['{op_value}']}}}} instead of {{'{key}': {{'contains': '{op_value}'}}}}"
1451+
)
1452+
# Handle array of values: generate AND conditions for each value (all must be present)
1453+
and_conditions = []
1454+
for item in op_value:
1455+
param_name = f"filter_{key}_{op}_{param_counter[0]}"
1456+
param_counter[0] += 1
1457+
params[param_name] = item
1458+
# For array fields, check if element is in array
1459+
and_conditions.append(f"${param_name} IN {node_alias}.{key}")
1460+
if and_conditions:
1461+
condition_parts.append(f"({' AND '.join(and_conditions)})")
14551462
elif op == "like":
14561463
# Handle like operator (for fuzzy matching, similar to SQL LIKE '%value%')
14571464
# Neo4j uses CONTAINS for string matching
@@ -1523,6 +1530,7 @@ def _parse_node(self, node_data: dict[str, Any]) -> dict[str, Any]:
15231530

15241531
def delete_node_by_prams(
15251532
self,
1533+
writable_cube_ids: list[str],
15261534
memory_ids: list[str] | None = None,
15271535
file_ids: list[str] | None = None,
15281536
filter: dict | None = None,
@@ -1531,56 +1539,90 @@ def delete_node_by_prams(
15311539
Delete nodes by memory_ids, file_ids, or filter.
15321540
15331541
Args:
1542+
writable_cube_ids (list[str]): List of cube IDs (user_name) to filter nodes. Required parameter.
15341543
memory_ids (list[str], optional): List of memory node IDs to delete.
15351544
file_ids (list[str], optional): List of file node IDs to delete.
15361545
filter (dict, optional): Filter dictionary to query matching nodes for deletion.
15371546
15381547
Returns:
15391548
int: Number of nodes deleted.
15401549
"""
1541-
# Collect all node IDs to delete
1542-
ids_to_delete = set()
1550+
logger.info(
1551+
f"[delete_node_by_prams] memory_ids: {memory_ids}, file_ids: {file_ids}, filter: {filter}, writable_cube_ids: {writable_cube_ids}"
1552+
)
1553+
print(
1554+
f"[delete_node_by_prams] memory_ids: {memory_ids}, file_ids: {file_ids}, filter: {filter}, writable_cube_ids: {writable_cube_ids}"
1555+
)
15431556

1544-
# Add memory_ids if provided
1557+
# Validate writable_cube_ids
1558+
if not writable_cube_ids or len(writable_cube_ids) == 0:
1559+
raise ValueError("writable_cube_ids is required and cannot be empty")
1560+
1561+
# Build WHERE conditions separately for memory_ids and file_ids
1562+
where_clauses = []
1563+
params = {}
1564+
1565+
# Build user_name condition from writable_cube_ids (OR relationship - match any cube_id)
1566+
user_name_conditions = []
1567+
for idx, cube_id in enumerate(writable_cube_ids):
1568+
param_name = f"cube_id_{idx}"
1569+
user_name_conditions.append(f"n.user_name = ${param_name}")
1570+
params[param_name] = cube_id
1571+
1572+
# Handle memory_ids: query n.id
15451573
if memory_ids and len(memory_ids) > 0:
1546-
ids_to_delete.update(memory_ids)
1574+
where_clauses.append("n.id IN $memory_ids")
1575+
params["memory_ids"] = memory_ids
15471576

1548-
# Add file_ids if provided (treating them as node IDs)
1577+
# Handle file_ids: query n.file_ids field
1578+
# All file_ids must be present in the array field (AND relationship)
15491579
if file_ids and len(file_ids) > 0:
1550-
ids_to_delete.update(file_ids)
1580+
file_id_and_conditions = []
1581+
for idx, file_id in enumerate(file_ids):
1582+
param_name = f"file_id_{idx}"
1583+
params[param_name] = file_id
1584+
# Check if this file_id is in the file_ids array field
1585+
file_id_and_conditions.append(f"${param_name} IN n.file_ids")
1586+
if file_id_and_conditions:
1587+
# Use AND to require all file_ids to be present
1588+
where_clauses.append(f"({' AND '.join(file_id_and_conditions)})")
15511589

15521590
# Query nodes by filter if provided
1591+
filter_ids = []
15531592
if filter:
15541593
# Use get_by_metadata with empty filters list and filter
15551594
filter_ids = self.get_by_metadata(
15561595
filters=[],
15571596
user_name=None,
15581597
filter=filter,
1559-
knowledgebase_ids=None,
1560-
user_name_flag=False,
1598+
knowledgebase_ids=writable_cube_ids,
15611599
)
1562-
ids_to_delete.update(filter_ids)
15631600

1564-
# If no IDs to delete, return 0
1565-
if not ids_to_delete:
1566-
logger.warning("[delete_node_by_prams] No nodes to delete")
1601+
# If filter returned IDs, add condition for them
1602+
if filter_ids:
1603+
where_clauses.append("n.id IN $filter_ids")
1604+
params["filter_ids"] = filter_ids
1605+
1606+
# If no conditions (except user_name), return 0
1607+
if not where_clauses:
1608+
logger.warning(
1609+
"[delete_node_by_prams] No nodes to delete (no memory_ids, file_ids, or filter provided)"
1610+
)
15671611
return 0
15681612

1569-
# Convert to list for easier handling
1570-
ids_list = list(ids_to_delete)
1571-
logger.info(f"[delete_node_by_prams] Deleting {len(ids_list)} nodes: {ids_list}")
1613+
# Build WHERE clause
1614+
# First, combine memory_ids, file_ids, and filter conditions with OR (any condition can match)
1615+
data_conditions = " OR ".join([f"({clause})" for clause in where_clauses])
15721616

1573-
# Build WHERE condition for collected IDs (query n.id)
1574-
ids_where = "n.id IN $ids_to_delete"
1575-
params = {"ids_to_delete": ids_list}
1617+
# Then, combine with user_name condition using AND (must match user_name AND one of the data conditions)
1618+
user_name_where = " OR ".join(user_name_conditions)
1619+
ids_where = f"({user_name_where}) AND ({data_conditions})"
15761620

1577-
# Calculate total count for logging
1578-
total_count = len(ids_list)
15791621
logger.info(
15801622
f"[delete_node_by_prams] Deleting nodes - memory_ids: {memory_ids}, file_ids: {file_ids}, filter: {filter}"
15811623
)
15821624
print(
1583-
f"[delete_node_by_prams] Deleting {total_count} nodes - memory_ids: {memory_ids}, file_ids: {file_ids}, filter: {filter}"
1625+
f"[delete_node_by_prams] Deleting nodes - memory_ids: {memory_ids}, file_ids: {file_ids}, filter: {filter}"
15841626
)
15851627

15861628
# First count matching nodes to get accurate count
@@ -1592,16 +1634,17 @@ def delete_node_by_prams(
15921634
delete_query = f"MATCH (n:Memory) WHERE {ids_where} DETACH DELETE n"
15931635
logger.info(f"[delete_node_by_prams] delete_query: {delete_query}")
15941636
print(f"[delete_node_by_prams] delete_query: {delete_query}")
1637+
print(f"[delete_node_by_prams] params: {params}")
15951638

15961639
deleted_count = 0
15971640
try:
15981641
with self.driver.session(database=self.db_name) as session:
15991642
# Count nodes before deletion
16001643
count_result = session.run(count_query, **params)
16011644
count_record = count_result.single()
1602-
expected_count = total_count
1645+
expected_count = 0
16031646
if count_record:
1604-
expected_count = count_record["node_count"] or total_count
1647+
expected_count = count_record["node_count"] or 0
16051648

16061649
# Delete nodes
16071650
session.run(delete_query, **params)

0 commit comments

Comments
 (0)