Wang-Daoji
diff --git a/‎README.md‎
Lines changed: 2 additions & 2 deletions b/‎README.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion b/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/memos/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎src/memos/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/memos/api/handlers/search_handler.py‎
Lines changed: 57 additions & 15 deletions b/‎src/memos/api/handlers/search_handler.py‎
Lines changed: 57 additions & 15 deletions
diff --git a/‎src/memos/api/middleware/__init__.py‎
Lines changed: 5 additions & 4 deletions b/‎src/memos/api/middleware/__init__.py‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎src/memos/api/product_models.py‎
Lines changed: 6 additions & 6 deletions b/‎src/memos/api/product_models.py‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎src/memos/api/utils/api_keys.py‎
Lines changed: 1 addition & 1 deletion b/‎src/memos/api/utils/api_keys.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/memos/embedders/universal_api.py‎
Lines changed: 0 additions & 1 deletion b/‎src/memos/embedders/universal_api.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎src/memos/graph_dbs/base.py‎
Lines changed: 31 additions & 1 deletion b/‎src/memos/graph_dbs/base.py‎
Lines changed: 31 additions & 1 deletion
diff --git a/‎src/memos/graph_dbs/neo4j.py‎
Lines changed: 27 additions & 4 deletions b/‎src/memos/graph_dbs/neo4j.py‎
Lines changed: 27 additions & 4 deletions
@@ -345,10 +345,10 @@ url = {https://global-sci.com/article/91443/memory3-language-modeling-with-expli
 
 ## 🙌 Contributing
 
-We welcome contributions from the community! Please read our [contribution guidelines](https://memos-docs.openmem.net/contribution/overview) to get started.
+We welcome contributions from the community! Please read our [contribution guidelines](https://memos-docs.openmem.net/open_source/contribution/overview/) to get started.
 
 <br>
 
 ## 📄 License
 
-MemOS is licensed under the [Apache 2.0 License](./LICENSE).
+MemOS is licensed under the [Apache 2.0 License](./LICENSE).
@@ -4,7 +4,7 @@
 ##############################################################################
 
 name = "MemoryOS"
-version = "2.0.6"
+version = "2.0.7"
 description = "Intelligence Begins with Memory"
 license = {text = "Apache-2.0"}
 readme = "README.md"
 
@@ -1,4 +1,4 @@
-__version__ = "2.0.6"
+__version__ = "2.0.7"
 
 from memos.configs.mem_cube import GeneralMemCubeConfig
 from memos.configs.mem_os import MOSConfig
 
@@ -64,7 +64,7 @@ def handle_search_memories(self, search_req: APISearchRequest) -> SearchResponse
 
         # Expand top_k for deduplication (5x to ensure enough candidates)
         if search_req_local.dedup in ("sim", "mmr"):
-            search_req_local.top_k = search_req_local.top_k * 5
+            search_req_local.top_k = search_req_local.top_k * 3
 
         # Search and deduplicate
         cube_view = self._build_cube_view(search_req_local)
@@ -152,9 +152,6 @@ def _dedup_text_memories(self, results: dict[str, Any], target_top_k: int) -> di
             return results
 
         embeddings = self._extract_embeddings([mem for _, mem, _ in flat])
-        if embeddings is None:
-            documents = [mem.get("memory", "") for _, mem, _ in flat]
-            embeddings = self.searcher.embedder.embed(documents)
 
         similarity_matrix = cosine_similarity_matrix(embeddings)
 
@@ -235,12 +232,39 @@ def _mmr_dedup_text_memories(
         if len(flat) <= 1:
             return results
 
+        total_by_type: dict[str, int] = {"text": 0, "preference": 0}
+        existing_by_type: dict[str, int] = {"text": 0, "preference": 0}
+        missing_by_type: dict[str, int] = {"text": 0, "preference": 0}
+        missing_indices: list[int] = []
+        for idx, (mem_type, _, mem, _) in enumerate(flat):
+            if mem_type not in total_by_type:
+                total_by_type[mem_type] = 0
+                existing_by_type[mem_type] = 0
+                missing_by_type[mem_type] = 0
+            total_by_type[mem_type] += 1
+
+            embedding = mem.get("metadata", {}).get("embedding")
+            if embedding:
+                existing_by_type[mem_type] += 1
+            else:
+                missing_by_type[mem_type] += 1
+                missing_indices.append(idx)
+
+        self.logger.info(
+            "[SearchHandler] MMR embedding metadata scan: total=%s total_by_type=%s existing_by_type=%s missing_by_type=%s",
+            len(flat),
+            total_by_type,
+            existing_by_type,
+            missing_by_type,
+        )
+        if missing_indices:
+            self.logger.warning(
+                "[SearchHandler] MMR embedding metadata missing; will compute missing embeddings: missing_total=%s",
+                len(missing_indices),
+            )
+
         # Get or compute embeddings
         embeddings = self._extract_embeddings([mem for _, _, mem, _ in flat])
-        if embeddings is None:
-            self.logger.warning("[SearchHandler] Embedding is missing; recomputing embeddings")
-            documents = [mem.get("memory", "") for _, _, mem, _ in flat]
-            embeddings = self.searcher.embedder.embed(documents)
 
         # Compute similarity matrix using NumPy-optimized method
         # Returns numpy array but compatible with list[i][j] indexing
@@ -404,14 +428,32 @@ def _max_similarity(
             return 0.0
         return max(similarity_matrix[index][j] for j in selected_indices)
 
-    @staticmethod
-    def _extract_embeddings(memories: list[dict[str, Any]]) -> list[list[float]] | None:
+    def _extract_embeddings(self, memories: list[dict[str, Any]]) -> list[list[float]]:
         embeddings: list[list[float]] = []
-        for mem in memories:
-            embedding = mem.get("metadata", {}).get("embedding")
-            if not embedding:
-                return None
-            embeddings.append(embedding)
+        missing_indices: list[int] = []
+        missing_documents: list[str] = []
+
+        for idx, mem in enumerate(memories):
+            metadata = mem.get("metadata")
+            if not isinstance(metadata, dict):
+                metadata = {}
+                mem["metadata"] = metadata
+
+            embedding = metadata.get("embedding")
+            if embedding:
+                embeddings.append(embedding)
+                continue
+
+            embeddings.append([])
+            missing_indices.append(idx)
+            missing_documents.append(mem.get("memory", ""))
+
+        if missing_indices:
+            computed = self.searcher.embedder.embed(missing_documents)
+            for idx, embedding in zip(missing_indices, computed, strict=False):
+                embeddings[idx] = embedding
+                memories[idx]["metadata"]["embedding"] = embedding
+
         return embeddings
 
     @staticmethod
 
@@ -1,13 +1,14 @@
 """Krolik middleware extensions for MemOS."""
 
-from .auth import verify_api_key, require_scope, require_admin, require_read, require_write
+from .auth import require_admin, require_read, require_scope, require_write, verify_api_key
 from .rate_limit import RateLimitMiddleware
 
+
 __all__ = [
-    "verify_api_key",
-    "require_scope",
+    "RateLimitMiddleware",
     "require_admin",
     "require_read",
+    "require_scope",
     "require_write",
-    "RateLimitMiddleware",
+    "verify_api_key",
 ]
@@ -99,12 +99,12 @@ class ChatRequest(BaseRequest):
     manager_user_id: str | None = Field(None, description="Manager User ID")
     project_id: str | None = Field(None, description="Project ID")
     relativity: float = Field(
-        0.0,
+        0.45,
         ge=0,
         description=(
             "Relevance threshold for recalled memories. "
             "Only memories with metadata.relativity >= relativity will be returned. "
-            "Use 0 to disable threshold filtering. Default: 0.3."
+            "Use 0 to disable threshold filtering. Default: 0.45."
         ),
     )
 
@@ -339,12 +339,12 @@ class APISearchRequest(BaseRequest):
     )
 
     relativity: float = Field(
-        0.0,
+        0.45,
         ge=0,
         description=(
             "Relevance threshold for recalled memories. "
             "Only memories with metadata.relativity >= relativity will be returned. "
-            "Use 0 to disable threshold filtering. Default: 0.3."
+            "Use 0 to disable threshold filtering. Default: 0.45."
         ),
     )
 
@@ -785,12 +785,12 @@ class APIChatCompleteRequest(BaseRequest):
     manager_user_id: str | None = Field(None, description="Manager User ID")
     project_id: str | None = Field(None, description="Project ID")
     relativity: float = Field(
-        0.0,
+        0.45,
         ge=0,
         description=(
             "Relevance threshold for recalled memories. "
             "Only memories with metadata.relativity >= relativity will be returned. "
-            "Use 0 to disable threshold filtering. Default: 0.3."
+            "Use 0 to disable threshold filtering. Default: 0.45."
         ),
     )
 
 
@@ -5,8 +5,8 @@
 """
 
 import hashlib
-import os
 import secrets
+
 from dataclasses import dataclass
 from datetime import datetime, timedelta
 
 
@@ -73,7 +73,6 @@ async def _create_embeddings():
                     )
                 )
                 logger.info(f"Embeddings request succeeded with {time.time() - init_time} seconds")
-                logger.info(f"Embeddings request response: {response}")
                 return [r.embedding for r in response.data]
             except Exception as e:
                 if self.use_backup_client:
 
@@ -1,12 +1,35 @@
+import re
+
 from abc import ABC, abstractmethod
 from typing import Any, Literal
 
 
+# Pattern for valid field names: alphanumeric and underscores, must start with letter or underscore
+_VALID_FIELD_NAME_RE = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_]*$")
+
+
 class BaseGraphDB(ABC):
     """
     Abstract base class for a graph database interface used in a memory-augmented RAG system.
     """
 
+    @staticmethod
+    def _validate_return_fields(return_fields: list[str] | None) -> list[str]:
+        """Validate and sanitize return_fields to prevent query injection.
+
+        Only allows alphanumeric characters and underscores in field names.
+        Silently drops invalid field names.
+
+        Args:
+            return_fields: List of field names to validate.
+
+        Returns:
+            List of valid field names.
+        """
+        if not return_fields:
+            return []
+        return [f for f in return_fields if _VALID_FIELD_NAME_RE.match(f)]
+
     # Node (Memory) Management
     @abstractmethod
     def add_node(self, id: str, memory: str, metadata: dict[str, Any]) -> None:
@@ -144,16 +167,23 @@ def get_context_chain(self, id: str, type: str = "FOLLOWS") -> list[str]:
 
     # Search / recall operations
     @abstractmethod
-    def search_by_embedding(self, vector: list[float], top_k: int = 5, **kwargs) -> list[dict]:
+    def search_by_embedding(
+        self, vector: list[float], top_k: int = 5, return_fields: list[str] | None = None, **kwargs
+    ) -> list[dict]:
         """
         Retrieve node IDs based on vector similarity.
 
         Args:
             vector (list[float]): The embedding vector representing query semantics.
             top_k (int): Number of top similar nodes to retrieve.
+            return_fields (list[str], optional): Additional node fields to include in results
+                (e.g., ["memory", "status", "tags"]). When provided, each result dict will
+                contain these fields in addition to 'id' and 'score'.
+                Defaults to None (only 'id' and 'score' are returned).
 
         Returns:
             list[dict]: A list of dicts with 'id' and 'score', ordered by similarity.
+                If return_fields is specified, each dict also includes the requested fields.
 
         Notes:
             - This method may internally call a VecDB (e.g., Qdrant) or store embeddings in the graph DB itself.
 
@@ -818,6 +818,7 @@ def search_by_embedding(
         user_name: str | None = None,
         filter: dict | None = None,
         knowledgebase_ids: list[str] | None = None,
+        return_fields: list[str] | None = None,
         **kwargs,
     ) -> list[dict]:
         """
@@ -832,9 +833,14 @@ def search_by_embedding(
             threshold (float, optional): Minimum similarity score threshold (0 ~ 1).
             search_filter (dict, optional): Additional metadata filters for search results.
                             Keys should match node properties, values are the expected values.
+            return_fields (list[str], optional): Additional node fields to include in results
+                            (e.g., ["memory", "status", "tags"]). When provided, each result
+                            dict will contain these fields in addition to 'id' and 'score'.
+                            Defaults to None (only 'id' and 'score' are returned).
 
         Returns:
             list[dict]: A list of dicts with 'id' and 'score', ordered by similarity.
+                If return_fields is specified, each dict also includes the requested fields.
 
         Notes:
             - This method uses Neo4j native vector indexing to search for similar nodes.
@@ -886,11 +892,20 @@ def search_by_embedding(
         if where_clauses:
             where_clause = "WHERE " + " AND ".join(where_clauses)
 
+        return_clause = "RETURN node.id AS id, score"
+        if return_fields:
+            validated_fields = self._validate_return_fields(return_fields)
+            extra_fields = ", ".join(
+                f"node.{field} AS {field}" for field in validated_fields if field != "id"
+            )
+            if extra_fields:
+                return_clause = f"RETURN node.id AS id, score, {extra_fields}"
+
         query = f"""
             CALL db.index.vector.queryNodes('memory_vector_index', $k, $embedding)
             YIELD node, score
             {where_clause}
-            RETURN node.id AS id, score
+            {return_clause}
         """
 
         parameters = {"embedding": vector, "k": top_k}
@@ -920,7 +935,15 @@ def search_by_embedding(
         print(f"[search_by_embedding] query: {query},parameters: {parameters}")
         with self.driver.session(database=self.db_name) as session:
             result = session.run(query, parameters)
-            records = [{"id": record["id"], "score": record["score"]} for record in result]
+            records = []
+            for record in result:
+                item = {"id": record["id"], "score": record["score"]}
+                if return_fields:
+                    record_keys = record.keys()
+                    for field in return_fields:
+                        if field != "id" and field in record_keys:
+                            item[field] = record[field]
+                records.append(item)
 
         # Threshold filtering after retrieval
         if threshold is not None:
@@ -943,8 +966,8 @@ def search_by_fulltext(
         **kwargs,
     ) -> list[dict]:
         """
-        TODO: 实现 Neo4j 的关键词检索, 以兼容 TreeTextMemory 的 keyword/fulltext 召回路径.
-        目前先返回空列表, 避免切换到 Neo4j 后因缺失方法导致运行时报错.
+        TODO: Implement fulltext search for Neo4j to be compatible with TreeTextMemory's keyword/fulltext recall path.
+        Currently, return an empty list to avoid runtime errors due to missing methods when switching to Neo4j.
         """
         return []
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-__version__ = "2.0.6"`
	`1`	`+__version__ = "2.0.7"`
`2`	`2`
`3`	`3`	`from memos.configs.mem_cube import GeneralMemCubeConfig`
`4`	`4`	`from memos.configs.mem_os import MOSConfig`
Original file line number	Diff line number	Diff line change
`@@ -73,7 +73,6 @@ async def _create_embeddings():`
`73`	`73`	`)`
`74`	`74`	`)`
`75`	`75`	`logger.info(f"Embeddings request succeeded with {time.time() - init_time} seconds")`
`76`		`- logger.info(f"Embeddings request response: {response}")`
`77`	`76`	`return [r.embedding for r in response.data]`
`78`	`77`	`except Exception as e:`
`79`	`78`	`if self.use_backup_client:`