Skip to content

Commit 2048c8f

Browse files
authored
Dev 20260224 v2.0.7 (MemTensor#1127)
## Description Please include a summary of the change, the problem it solves, the implementation approach, and relevant context. List any dependencies required for this change. Related Issue (Required): Fixes @issue_number ## Type of change Please delete options that are not relevant. - [ ] Bug fix (non-breaking change which fixes an issue) - [ ] New feature (non-breaking change which adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) - [ ] Refactor (does not change functionality, e.g. code style improvements, linting) - [ ] Documentation update ## How Has This Been Tested? Please describe the tests that you ran to verify your changes. Provide instructions so we can reproduce. Please also list any relevant details for your test configuration - [ ] Unit Test - [ ] Test Script Or Test Steps (please provide) - [ ] Pipeline Automated API Test (please provide) ## Checklist - [ ] I have performed a self-review of my own code | 我已自行检查了自己的代码 - [ ] I have commented my code in hard-to-understand areas | 我已在难以理解的地方对代码进行了注释 - [ ] I have added tests that prove my fix is effective or that my feature works | 我已添加测试以证明我的修复有效或功能正常 - [ ] I have created related documentation issue/PR in [MemOS-Docs](https://github.com/MemTensor/MemOS-Docs) (if applicable) | 我已在 [MemOS-Docs](https://github.com/MemTensor/MemOS-Docs) 中创建了相关的文档 issue/PR(如果适用) - [ ] I have linked the issue to this PR (if applicable) | 我已将 issue 链接到此 PR(如果适用) - [ ] I have mentioned the person who will review this PR | 我已提及将审查此 PR 的人 ## Reviewer Checklist - [ ] closes #xxxx (Replace xxxx with the GitHub issue number) - [ ] Made sure Checks passed - [ ] Tests have been provided
2 parents 7747de1 + b73affa commit 2048c8f

File tree

29 files changed

+1036
-532
lines changed

29 files changed

+1036
-532
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -345,10 +345,10 @@ url = {https://global-sci.com/article/91443/memory3-language-modeling-with-expli
345345
346346
## 🙌 Contributing
347347
348-
We welcome contributions from the community! Please read our [contribution guidelines](https://memos-docs.openmem.net/contribution/overview) to get started.
348+
We welcome contributions from the community! Please read our [contribution guidelines](https://memos-docs.openmem.net/open_source/contribution/overview/) to get started.
349349
350350
<br>
351351
352352
## 📄 License
353353
354-
MemOS is licensed under the [Apache 2.0 License](./LICENSE).
354+
MemOS is licensed under the [Apache 2.0 License](./LICENSE).

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
##############################################################################
55

66
name = "MemoryOS"
7-
version = "2.0.6"
7+
version = "2.0.7"
88
description = "Intelligence Begins with Memory"
99
license = {text = "Apache-2.0"}
1010
readme = "README.md"

src/memos/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
__version__ = "2.0.6"
1+
__version__ = "2.0.7"
22

33
from memos.configs.mem_cube import GeneralMemCubeConfig
44
from memos.configs.mem_os import MOSConfig

src/memos/api/handlers/search_handler.py

Lines changed: 57 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ def handle_search_memories(self, search_req: APISearchRequest) -> SearchResponse
6464

6565
# Expand top_k for deduplication (5x to ensure enough candidates)
6666
if search_req_local.dedup in ("sim", "mmr"):
67-
search_req_local.top_k = search_req_local.top_k * 5
67+
search_req_local.top_k = search_req_local.top_k * 3
6868

6969
# Search and deduplicate
7070
cube_view = self._build_cube_view(search_req_local)
@@ -152,9 +152,6 @@ def _dedup_text_memories(self, results: dict[str, Any], target_top_k: int) -> di
152152
return results
153153

154154
embeddings = self._extract_embeddings([mem for _, mem, _ in flat])
155-
if embeddings is None:
156-
documents = [mem.get("memory", "") for _, mem, _ in flat]
157-
embeddings = self.searcher.embedder.embed(documents)
158155

159156
similarity_matrix = cosine_similarity_matrix(embeddings)
160157

@@ -235,12 +232,39 @@ def _mmr_dedup_text_memories(
235232
if len(flat) <= 1:
236233
return results
237234

235+
total_by_type: dict[str, int] = {"text": 0, "preference": 0}
236+
existing_by_type: dict[str, int] = {"text": 0, "preference": 0}
237+
missing_by_type: dict[str, int] = {"text": 0, "preference": 0}
238+
missing_indices: list[int] = []
239+
for idx, (mem_type, _, mem, _) in enumerate(flat):
240+
if mem_type not in total_by_type:
241+
total_by_type[mem_type] = 0
242+
existing_by_type[mem_type] = 0
243+
missing_by_type[mem_type] = 0
244+
total_by_type[mem_type] += 1
245+
246+
embedding = mem.get("metadata", {}).get("embedding")
247+
if embedding:
248+
existing_by_type[mem_type] += 1
249+
else:
250+
missing_by_type[mem_type] += 1
251+
missing_indices.append(idx)
252+
253+
self.logger.info(
254+
"[SearchHandler] MMR embedding metadata scan: total=%s total_by_type=%s existing_by_type=%s missing_by_type=%s",
255+
len(flat),
256+
total_by_type,
257+
existing_by_type,
258+
missing_by_type,
259+
)
260+
if missing_indices:
261+
self.logger.warning(
262+
"[SearchHandler] MMR embedding metadata missing; will compute missing embeddings: missing_total=%s",
263+
len(missing_indices),
264+
)
265+
238266
# Get or compute embeddings
239267
embeddings = self._extract_embeddings([mem for _, _, mem, _ in flat])
240-
if embeddings is None:
241-
self.logger.warning("[SearchHandler] Embedding is missing; recomputing embeddings")
242-
documents = [mem.get("memory", "") for _, _, mem, _ in flat]
243-
embeddings = self.searcher.embedder.embed(documents)
244268

245269
# Compute similarity matrix using NumPy-optimized method
246270
# Returns numpy array but compatible with list[i][j] indexing
@@ -404,14 +428,32 @@ def _max_similarity(
404428
return 0.0
405429
return max(similarity_matrix[index][j] for j in selected_indices)
406430

407-
@staticmethod
408-
def _extract_embeddings(memories: list[dict[str, Any]]) -> list[list[float]] | None:
431+
def _extract_embeddings(self, memories: list[dict[str, Any]]) -> list[list[float]]:
409432
embeddings: list[list[float]] = []
410-
for mem in memories:
411-
embedding = mem.get("metadata", {}).get("embedding")
412-
if not embedding:
413-
return None
414-
embeddings.append(embedding)
433+
missing_indices: list[int] = []
434+
missing_documents: list[str] = []
435+
436+
for idx, mem in enumerate(memories):
437+
metadata = mem.get("metadata")
438+
if not isinstance(metadata, dict):
439+
metadata = {}
440+
mem["metadata"] = metadata
441+
442+
embedding = metadata.get("embedding")
443+
if embedding:
444+
embeddings.append(embedding)
445+
continue
446+
447+
embeddings.append([])
448+
missing_indices.append(idx)
449+
missing_documents.append(mem.get("memory", ""))
450+
451+
if missing_indices:
452+
computed = self.searcher.embedder.embed(missing_documents)
453+
for idx, embedding in zip(missing_indices, computed, strict=False):
454+
embeddings[idx] = embedding
455+
memories[idx]["metadata"]["embedding"] = embedding
456+
415457
return embeddings
416458

417459
@staticmethod
Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
"""Krolik middleware extensions for MemOS."""
22

3-
from .auth import verify_api_key, require_scope, require_admin, require_read, require_write
3+
from .auth import require_admin, require_read, require_scope, require_write, verify_api_key
44
from .rate_limit import RateLimitMiddleware
55

6+
67
__all__ = [
7-
"verify_api_key",
8-
"require_scope",
8+
"RateLimitMiddleware",
99
"require_admin",
1010
"require_read",
11+
"require_scope",
1112
"require_write",
12-
"RateLimitMiddleware",
13+
"verify_api_key",
1314
]

src/memos/api/product_models.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -99,12 +99,12 @@ class ChatRequest(BaseRequest):
9999
manager_user_id: str | None = Field(None, description="Manager User ID")
100100
project_id: str | None = Field(None, description="Project ID")
101101
relativity: float = Field(
102-
0.0,
102+
0.45,
103103
ge=0,
104104
description=(
105105
"Relevance threshold for recalled memories. "
106106
"Only memories with metadata.relativity >= relativity will be returned. "
107-
"Use 0 to disable threshold filtering. Default: 0.3."
107+
"Use 0 to disable threshold filtering. Default: 0.45."
108108
),
109109
)
110110

@@ -339,12 +339,12 @@ class APISearchRequest(BaseRequest):
339339
)
340340

341341
relativity: float = Field(
342-
0.0,
342+
0.45,
343343
ge=0,
344344
description=(
345345
"Relevance threshold for recalled memories. "
346346
"Only memories with metadata.relativity >= relativity will be returned. "
347-
"Use 0 to disable threshold filtering. Default: 0.3."
347+
"Use 0 to disable threshold filtering. Default: 0.45."
348348
),
349349
)
350350

@@ -785,12 +785,12 @@ class APIChatCompleteRequest(BaseRequest):
785785
manager_user_id: str | None = Field(None, description="Manager User ID")
786786
project_id: str | None = Field(None, description="Project ID")
787787
relativity: float = Field(
788-
0.0,
788+
0.45,
789789
ge=0,
790790
description=(
791791
"Relevance threshold for recalled memories. "
792792
"Only memories with metadata.relativity >= relativity will be returned. "
793-
"Use 0 to disable threshold filtering. Default: 0.3."
793+
"Use 0 to disable threshold filtering. Default: 0.45."
794794
),
795795
)
796796

src/memos/api/utils/api_keys.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
"""
66

77
import hashlib
8-
import os
98
import secrets
9+
1010
from dataclasses import dataclass
1111
from datetime import datetime, timedelta
1212

src/memos/embedders/universal_api.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,6 @@ async def _create_embeddings():
7373
)
7474
)
7575
logger.info(f"Embeddings request succeeded with {time.time() - init_time} seconds")
76-
logger.info(f"Embeddings request response: {response}")
7776
return [r.embedding for r in response.data]
7877
except Exception as e:
7978
if self.use_backup_client:

src/memos/graph_dbs/base.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,35 @@
1+
import re
2+
13
from abc import ABC, abstractmethod
24
from typing import Any, Literal
35

46

7+
# Pattern for valid field names: alphanumeric and underscores, must start with letter or underscore
8+
_VALID_FIELD_NAME_RE = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_]*$")
9+
10+
511
class BaseGraphDB(ABC):
612
"""
713
Abstract base class for a graph database interface used in a memory-augmented RAG system.
814
"""
915

16+
@staticmethod
17+
def _validate_return_fields(return_fields: list[str] | None) -> list[str]:
18+
"""Validate and sanitize return_fields to prevent query injection.
19+
20+
Only allows alphanumeric characters and underscores in field names.
21+
Silently drops invalid field names.
22+
23+
Args:
24+
return_fields: List of field names to validate.
25+
26+
Returns:
27+
List of valid field names.
28+
"""
29+
if not return_fields:
30+
return []
31+
return [f for f in return_fields if _VALID_FIELD_NAME_RE.match(f)]
32+
1033
# Node (Memory) Management
1134
@abstractmethod
1235
def add_node(self, id: str, memory: str, metadata: dict[str, Any]) -> None:
@@ -144,16 +167,23 @@ def get_context_chain(self, id: str, type: str = "FOLLOWS") -> list[str]:
144167

145168
# Search / recall operations
146169
@abstractmethod
147-
def search_by_embedding(self, vector: list[float], top_k: int = 5, **kwargs) -> list[dict]:
170+
def search_by_embedding(
171+
self, vector: list[float], top_k: int = 5, return_fields: list[str] | None = None, **kwargs
172+
) -> list[dict]:
148173
"""
149174
Retrieve node IDs based on vector similarity.
150175
151176
Args:
152177
vector (list[float]): The embedding vector representing query semantics.
153178
top_k (int): Number of top similar nodes to retrieve.
179+
return_fields (list[str], optional): Additional node fields to include in results
180+
(e.g., ["memory", "status", "tags"]). When provided, each result dict will
181+
contain these fields in addition to 'id' and 'score'.
182+
Defaults to None (only 'id' and 'score' are returned).
154183
155184
Returns:
156185
list[dict]: A list of dicts with 'id' and 'score', ordered by similarity.
186+
If return_fields is specified, each dict also includes the requested fields.
157187
158188
Notes:
159189
- This method may internally call a VecDB (e.g., Qdrant) or store embeddings in the graph DB itself.

src/memos/graph_dbs/neo4j.py

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -818,6 +818,7 @@ def search_by_embedding(
818818
user_name: str | None = None,
819819
filter: dict | None = None,
820820
knowledgebase_ids: list[str] | None = None,
821+
return_fields: list[str] | None = None,
821822
**kwargs,
822823
) -> list[dict]:
823824
"""
@@ -832,9 +833,14 @@ def search_by_embedding(
832833
threshold (float, optional): Minimum similarity score threshold (0 ~ 1).
833834
search_filter (dict, optional): Additional metadata filters for search results.
834835
Keys should match node properties, values are the expected values.
836+
return_fields (list[str], optional): Additional node fields to include in results
837+
(e.g., ["memory", "status", "tags"]). When provided, each result
838+
dict will contain these fields in addition to 'id' and 'score'.
839+
Defaults to None (only 'id' and 'score' are returned).
835840
836841
Returns:
837842
list[dict]: A list of dicts with 'id' and 'score', ordered by similarity.
843+
If return_fields is specified, each dict also includes the requested fields.
838844
839845
Notes:
840846
- This method uses Neo4j native vector indexing to search for similar nodes.
@@ -886,11 +892,20 @@ def search_by_embedding(
886892
if where_clauses:
887893
where_clause = "WHERE " + " AND ".join(where_clauses)
888894

895+
return_clause = "RETURN node.id AS id, score"
896+
if return_fields:
897+
validated_fields = self._validate_return_fields(return_fields)
898+
extra_fields = ", ".join(
899+
f"node.{field} AS {field}" for field in validated_fields if field != "id"
900+
)
901+
if extra_fields:
902+
return_clause = f"RETURN node.id AS id, score, {extra_fields}"
903+
889904
query = f"""
890905
CALL db.index.vector.queryNodes('memory_vector_index', $k, $embedding)
891906
YIELD node, score
892907
{where_clause}
893-
RETURN node.id AS id, score
908+
{return_clause}
894909
"""
895910

896911
parameters = {"embedding": vector, "k": top_k}
@@ -920,7 +935,15 @@ def search_by_embedding(
920935
print(f"[search_by_embedding] query: {query},parameters: {parameters}")
921936
with self.driver.session(database=self.db_name) as session:
922937
result = session.run(query, parameters)
923-
records = [{"id": record["id"], "score": record["score"]} for record in result]
938+
records = []
939+
for record in result:
940+
item = {"id": record["id"], "score": record["score"]}
941+
if return_fields:
942+
record_keys = record.keys()
943+
for field in return_fields:
944+
if field != "id" and field in record_keys:
945+
item[field] = record[field]
946+
records.append(item)
924947

925948
# Threshold filtering after retrieval
926949
if threshold is not None:
@@ -943,8 +966,8 @@ def search_by_fulltext(
943966
**kwargs,
944967
) -> list[dict]:
945968
"""
946-
TODO: 实现 Neo4j 的关键词检索, 以兼容 TreeTextMemory 的 keyword/fulltext 召回路径.
947-
目前先返回空列表, 避免切换到 Neo4j 后因缺失方法导致运行时报错.
969+
TODO: Implement fulltext search for Neo4j to be compatible with TreeTextMemory's keyword/fulltext recall path.
970+
Currently, return an empty list to avoid runtime errors due to missing methods when switching to Neo4j.
948971
"""
949972
return []
950973

0 commit comments

Comments
 (0)