feat: support nebular database

tianxing02 · tianxing02 · commit 7f79542b4b6d · 2025-07-25T23:15:19.000+08:00
diff --git a/src/memos/graph_dbs/nebular.py b/src/memos/graph_dbs/nebular.py
@@ -113,7 +113,7 @@ def __init__(self, config: NebulaGraphDBConfig):
             username=config.get("user_name"),
             password=config.get("password"),
         )
-        self.db_name = config.db_name
+        self.db_name = config.space
         self.space = config.get("space")
         self.user_name = config.user_name
         self.system_db_name = "system" if config.use_multi_db else config.space
@@ -336,7 +336,6 @@ def edge_exists(
         query += "\nRETURN r"
 
         # Run the Cypher query
-        print("\n ======> query: ", query)
         result = self.client.execute(query)
         return result.one_or_none().values() is not None
 
@@ -661,7 +660,40 @@ def get_by_metadata(self, filters: list[dict[str, Any]]) -> list[str]:
             - Supports structured querying such as tag/category/importance/time filtering.
             - Can be used for faceted recall or prefiltering before embedding rerank.
         """
-        raise NotImplementedError
+        where_clauses = []
+        for _i, f in enumerate(filters):
+            field = f["field"]
+            op = f.get("op", "=")
+            value = f["value"]
+
+            # Build WHERE clause
+            if op == "=":
+                where_clauses.append(f"n.{field} = {value}")
+            elif op == "in":
+                where_clauses.append(f"n.{field} IN {value}")
+            elif op == "contains":
+                where_clauses.append(f"ANY(x IN {value} WHERE x IN n.{field})")
+            elif op == "starts_with":
+                where_clauses.append(f"n.{field} STARTS WITH {value}")
+            elif op == "ends_with":
+                where_clauses.append(f"n.{field} ENDS WITH {value}")
+            elif op in [">", ">=", "<", "<="]:
+                where_clauses.append(f"n.{field} {op} {value}")
+            else:
+                raise ValueError(f"Unsupported operator: {op}")
+
+        if not self.config.use_multi_db and self.config.user_name:
+            where_clauses.append(f"n.user_name = '{self.config.user_name}'")
+
+        where_str = " AND ".join(where_clauses)
+        query = f"MATCH (n@Memory) WHERE {where_str} RETURN n.id AS id"
+
+        try:
+            print("\n==========>   query:\n", query)
+            result = self.client.execute(query)
+            return [record["id"].value for record in result]
+        except Exception as e:
+            logger.error(f"Failed to get metadata: {e}")
 
     def get_grouped_counts(
         self,
@@ -827,7 +859,6 @@ def import_graph(self, data: dict[str, Any]) -> None:
            '''
             self.client.execute(edge_gql)
 
-    # TODO
     def get_all_memory_items(self, scope: str) -> list[dict]:
         """
         Retrieve all memory items of a specific memory_type.
@@ -838,7 +869,24 @@ def get_all_memory_items(self, scope: str) -> list[dict]:
         Returns:
             list[dict]: Full list of memory items under this scope.
         """
-        raise NotImplementedError
+        if scope not in {"WorkingMemory", "LongTermMemory", "UserMemory"}:
+            raise ValueError(f"Unsupported memory type scope: {scope}")
+
+        where_clause = f"WHERE n.memory_type = '{scope}'"
+
+        if not self.config.use_multi_db and self.config.user_name:
+            where_clause += f" AND n.user_name = '{self.config.user_name}'"
+
+        query = f"""
+                   MATCH (n@Memory)
+                   {where_clause}
+                   RETURN n
+                   """
+        try:
+            results = self.client.execute(query)
+            return [self._parse_node(record["n"]) for record in results]
+        except Exception as e:
+            logger.error(f"Failed to get memories: {e}")
 
     # TODO
     def get_structure_optimization_candidates(self, scope: str) -> list[dict]:
@@ -847,16 +895,35 @@ def get_structure_optimization_candidates(self, scope: str) -> list[dict]:
         - Isolated nodes, nodes with empty background, or nodes with exactly one child.
         - Plus: the child of any parent node that has exactly one child.
         """
-        raise NotImplementedError
+        where_clause = f"""
+                        WHERE n.memory_type = '{scope}'
+                          AND n.status = 'activated'
+                          AND NOT ( (n)-[r@PARENT]->() OR ()-[r@PARENT]->(n) )
+                    """
+
+        if not self.config.use_multi_db and self.config.user_name:
+            where_clause += f" AND n.user_name = '{self.config.user_name}'"
+
+        query = f"""
+                    MATCH (n@Memory)
+                    {where_clause}
+                    RETURN n.id AS id, n AS node
+                    """
+        try:
+            results = self.client.execute(query)
+            return [
+                self._parse_node({"id": record["id"], **dict(record["node"])}) for record in results
+            ]
+        except Exception as e:
+            logger.error(f"Failed : {e}")
 
-    # TODO
     def drop_database(self) -> None:
         """
         Permanently delete the entire database this instance is using.
         WARNING: This operation is destructive and cannot be undone.
         """
         if self.config.use_multi_db:
-            self.client.execute(f"DROP DATABASE {self.db_name} IF EXISTS")
+            self.client.execute(f"DROP GRAPH {self.db_name}")
             logger.info(f"Database '{self.db_name}' has been dropped.")
         else:
             raise ValueError(
@@ -992,7 +1059,6 @@ def _create_basic_property_indexes(self) -> None:
         """
         raise NotImplementedError
 
-    # TODO
     def _index_exists(self, index_name: str) -> bool:
         """
         Check if an index with the given name exists.
diff --git a/tests/graph_dbs/test_nebular.py b/tests/graph_dbs/test_nebular.py
@@ -29,7 +29,7 @@
     "hosts": json.loads(os.getenv("NEBULAR_HOSTS", "localhost")),
     "user_name": os.getenv("NEBULAR_USER", "root"),
     "password": os.getenv("NEBULAR_PASSWORD", "xxxxxx"),
-    "space": "test_memory_count",
+    "space": "memory_graph",
     "auto_create": True,
     "embedding_dimension": 3072,
     "use_multi_db": False,
@@ -224,3 +224,187 @@ def test_get_edges():
     assert edges[0]["from"] == source.id
     assert edges[0]["to"] == target.id
     assert edges[0]["type"] == "PARENT"
+
+
+def test_get_all_memory_items():
+    graph = GraphStoreFactory.from_config(
+        GraphDBConfigFactory(
+            backend="nebular",
+            config=nebular_config,
+        )
+    )
+    graph.clear()
+
+    # Insert 2 WorkingMemory items
+    for i in range(2):
+        mem = TextualMemoryItem(
+            memory=f"Memory {i}",
+            metadata=TreeNodeTextualMemoryMetadata(
+                memory_type="WorkingMemory",
+                key="Research Topic",
+                hierarchy_level="topic",
+                type="fact",
+                memory_time="2024-01-01",
+                status="activated",
+                visibility="public",
+                updated_at=now,
+                embedding=embed_memory_item(f"Memory {i}"),
+            ),
+        )
+        graph.add_node(mem.id, mem.memory, mem.metadata.model_dump(exclude_none=True))
+
+    # Retrieve all memory items of type WorkingMemory
+    items = graph.get_all_memory_items("WorkingMemory")
+    assert len(items) == 2
+    assert all(item["properties"]["memory_type"] == "WorkingMemory" for item in items)
+
+
+def test_get_structure_optimization_candidates():
+    graph = GraphStoreFactory.from_config(
+        GraphDBConfigFactory(
+            backend="nebular",
+            config=nebular_config,
+        )
+    )
+    graph.clear()
+
+    # Insert one isolated node (no parent or child)
+    mem = TextualMemoryItem(
+        memory="Isolated memory",
+        metadata=TreeNodeTextualMemoryMetadata(
+            memory_type="LongTermMemory",
+            key="Research Topic",
+            hierarchy_level="topic",
+            type="fact",
+            memory_time="2024-01-01",
+            status="activated",
+            visibility="public",
+            updated_at=now,
+            embedding=embed_memory_item("Isolated memory"),
+        ),
+    )
+    graph.add_node(mem.id, mem.memory, mem.metadata.model_dump(exclude_none=True))
+
+    # Insert one node with empty background (and no edges)
+    mem2 = TextualMemoryItem(
+        memory="Empty background memory",
+        metadata=TreeNodeTextualMemoryMetadata(
+            memory_type="LongTermMemory",
+            key="Research Topic",
+            hierarchy_level="topic",
+            type="fact",
+            memory_time="2024-01-01",
+            status="activated",
+            visibility="public",
+            updated_at=now,
+            embedding=embed_memory_item("Empty background memory"),
+        ),
+    )
+    graph.add_node(mem2.id, mem2.memory, mem2.metadata.model_dump(exclude_none=True))
+
+    # Find optimization candidates
+    candidates = graph.get_structure_optimization_candidates("LongTermMemory")
+    print("Optimization candidates:", candidates)
+    assert any("Isolated memory" in c["memory"] for c in candidates)
+    assert any("Empty background memory" in c["memory"] for c in candidates)
+
+
+def test_drop_database():
+    config = GraphDBConfigFactory(
+        backend="nebular",
+        config=nebular_config,
+    )
+    graph = GraphStoreFactory.from_config(config)
+
+    # Create a dummy node
+    mem = TextualMemoryItem(
+        memory="Temp for drop DB",
+        metadata=TreeNodeTextualMemoryMetadata(
+            memory_type="LongTermMemory",
+            key="Research Topic",
+            hierarchy_level="topic",
+            type="fact",
+            memory_time="2024-01-01",
+            status="activated",
+            visibility="public",
+            updated_at=now,
+            embedding=embed_memory_item("Temp for drop DB"),
+        ),
+    )
+    graph.add_node(mem.id, mem.memory, mem.metadata.model_dump(exclude_none=True))
+
+    # Drop the database
+    graph.drop_database()
+
+    # Attempting any operation afterward should raise an error or fail (optional)
+    try:
+        _ = graph.get_all_memory_items("WorkingMemory")
+    except Exception as e:
+        print("Expected exception after DB drop:", str(e))
+        assert "Current working graph not found" in str(e)
+
+
+def test_get_by_metadata():
+    config = GraphDBConfigFactory(
+        backend="nebular",
+        config=nebular_config,
+    )
+    graph = GraphStoreFactory.from_config(config)
+    graph.clear()
+
+    mem1 = TextualMemoryItem(
+        memory="AI for science",
+        metadata=TreeNodeTextualMemoryMetadata(
+            memory_type="LongTermMemory",
+            key="AI Science",
+            confidence=92.5,
+            tags=["AI", "science"],
+            hierarchy_level="topic",
+            type="fact",
+            memory_time="2024-01-01",
+            status="activated",
+            visibility="public",
+            updated_at=now,
+            embedding=embed_memory_item("AI for science"),
+        ),
+    )
+    mem2 = TextualMemoryItem(
+        memory="Neurosymbolic reasoning",
+        metadata=TreeNodeTextualMemoryMetadata(
+            memory_type="LongTermMemory",
+            key="Neurosymbolic",
+            tags=["symbolic", "reasoning"],
+            confidence=88.0,
+            hierarchy_level="topic",
+            type="fact",
+            memory_time="2024-01-01",
+            status="activated",
+            visibility="public",
+            updated_at=now,
+            embedding=embed_memory_item("Neurosymbolic reasoning"),
+        ),
+    )
+    graph.add_node(mem1.id, mem1.memory, mem1.metadata.model_dump(exclude_none=True))
+    graph.add_node(mem2.id, mem2.memory, mem2.metadata.model_dump(exclude_none=True))
+
+    # Exact match filter
+    result_ids = graph.get_by_metadata([{"field": "key", "op": "=", "value": '"AI Science"'}])
+    assert mem1.id in result_ids
+    assert mem2.id not in result_ids
+
+    # Confidence filter
+    result_ids = graph.get_by_metadata([{"field": "confidence", "op": ">=", "value": 90.0}])
+    assert mem1.id in result_ids
+    assert mem2.id not in result_ids
+
+    # Tag contains filter TODO
+    result_ids = graph.get_by_metadata([{"field": "tags", "op": "contains", "value": '["AI"]'}])
+    assert mem1.id in result_ids
+    assert mem2.id not in result_ids
+
+    # In set filter
+    result_ids = graph.get_by_metadata(
+        [{"field": "key", "op": "in", "value": '["AI Science", "Neurosymbolic"]'}]
+    )
+    assert mem1.id in result_ids
+    assert mem2.id in result_ids