add: poll gragh db

fridayL · fridayL · commit 991c887f2c4f · 2025-07-25T07:01:59.000Z
diff --git a/examples/mem_os/connection_comparison.py b/examples/mem_os/connection_comparison.py
@@ -0,0 +1,81 @@
+"""
+Comparison: Regular vs Pooled Neo4j connections.
+
+This script demonstrates the difference in connection management
+between regular Neo4j backend and the pooled version.
+"""
+
+from memos.configs.graph_db import GraphDBConfigFactory
+from memos.graph_dbs.connection_pool import connection_pool
+from memos.graph_dbs.factory import GraphStoreFactory
+
+
+def create_graph_instance(backend: str, user_id: str):
+    """Create a graph database instance with specified backend."""
+    config = GraphDBConfigFactory(
+        backend=backend,
+        config={
+            "uri": "bolt://localhost:7687",
+            "user": "neo4j",
+            "password": "12345678",
+            "db_name": "test_comparison",
+            "user_name": f"user_{user_id}",
+            "use_multi_db": False,
+            "auto_create": False,  # Skip auto-creation for demo
+            "embedding_dimension": 768,
+        },
+    )
+    return GraphStoreFactory.from_config(config)
+
+
+def demo_regular_connections():
+    """Demonstrate regular Neo4j connections (each instance creates own connection)."""
+    print("\n=== Regular Neo4j Backend ===")
+    instances = []
+
+    for i in range(3):
+        print(f"Creating instance {i + 1}...")
+        instance = create_graph_instance("neo4j", f"user_{i}")
+        instances.append(instance)
+        print(f"Instance {i + 1} created with separate connection")
+
+    print(f"Total instances created: {len(instances)}")
+    print("Note: Each instance has its own database connection")
+
+
+def demo_pooled_connections():
+    """Demonstrate pooled Neo4j connections (shared connection pool)."""
+    print("\n=== Neo4j Pooled Backend ===")
+    print(f"Initial pool connections: {connection_pool.get_active_connections()}")
+
+    instances = []
+
+    for i in range(3):
+        print(f"Creating instance {i + 1}...")
+        instance = create_graph_instance("neo4j-pooled", f"user_{i}")
+        instances.append(instance)
+        print(f"Pool connections: {connection_pool.get_active_connections()}")
+
+    print(f"Total instances created: {len(instances)}")
+    print(f"Shared connections in pool: {connection_pool.get_active_connections()}")
+    print("Note: All instances share the same database connection!")
+
+
+def main():
+    """Run the comparison demo."""
+    print("=== Neo4j Connection Management Comparison ===")
+
+    # Demo regular connections
+    demo_regular_connections()
+
+    # Demo pooled connections
+    demo_pooled_connections()
+
+    print("\n=== Summary ===")
+    print("• Regular backend: Each instance = 1 connection")
+    print("• Pooled backend: Multiple instances = 1 shared connection")
+    print("• Pooled version reduces connection overhead for multi-user scenarios")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/mem_os/pooled_connection_example.py b/examples/mem_os/pooled_connection_example.py
@@ -0,0 +1,104 @@
+"""
+Example: Using Neo4j connection pooling to reduce connection overhead.
+
+This example demonstrates how to use the neo4j-pooled backend to share
+database connections across multiple users/memory instances.
+"""
+
+from memos.configs.mem_cube import GeneralMemCubeConfig
+from memos.graph_dbs.connection_pool import connection_pool
+from memos.mem_cube.general import GeneralMemCube
+
+
+def create_user_cube(user_id: str, openai_api_key: str) -> GeneralMemCube:
+    """Create a memory cube for a user using pooled connections."""
+
+    config = GeneralMemCubeConfig(
+        cube_id=f"user_{user_id}",
+        text_mem={
+            "backend": "tree_text",
+            "config": {
+                "extractor_llm": {
+                    "backend": "openai",
+                    "config": {
+                        "api_key": openai_api_key,
+                        "model_name": "gpt-4o-mini",
+                    },
+                },
+                "dispatcher_llm": {
+                    "backend": "openai",
+                    "config": {
+                        "api_key": openai_api_key,
+                        "model_name": "gpt-4o-mini",
+                    },
+                },
+                "graph_db": {
+                    "backend": "neo4j-pooled",  # Use pooled version
+                    "config": {
+                        "uri": "bolt://localhost:7687",
+                        "user": "neo4j",
+                        "password": "12345678",
+                        "db_name": "shared_memos",
+                        "user_name": f"user_{user_id}",
+                        "use_multi_db": False,
+                        "auto_create": True,
+                        "embedding_dimension": 3072,
+                    },
+                },
+                "embedder": {
+                    "backend": "sentence_transformer",
+                    "config": {"model_name_or_path": "sentence-transformers/all-mpnet-base-v2"},
+                },
+                "reorganize": False,
+            },
+        },
+    )
+
+    return GeneralMemCube(config)
+
+
+def main():
+    """Demonstrate connection pooling with multiple users."""
+
+    # Replace with your actual OpenAI API key
+    openai_api_key = "your-openai-api-key-here"
+
+    print("=== Neo4j Connection Pooling Demo ===")
+    print(f"Initial connections: {connection_pool.get_active_connections()}")
+
+    # Create multiple user cubes
+    users = ["alice", "bob", "charlie"]
+    cubes = {}
+
+    for user_id in users:
+        print(f"\nCreating cube for user: {user_id}")
+        cubes[user_id] = create_user_cube(user_id, openai_api_key)
+        print(f"Active connections: {connection_pool.get_active_connections()}")
+
+    # Add some memories for each user
+    memories = {
+        "alice": "Alice loves hiking in the mountains.",
+        "bob": "Bob is a software engineer who enjoys cooking.",
+        "charlie": "Charlie plays guitar and loves jazz music.",
+    }
+
+    print("\n=== Adding memories ===")
+    for user_id, memory in memories.items():
+        if cubes[user_id].text_mem:
+            cubes[user_id].text_mem.add(memory)
+            print(f"Added memory for {user_id}")
+
+    # Search memories
+    print("\n=== Searching memories ===")
+    for user_id in users:
+        if cubes[user_id].text_mem:
+            results = cubes[user_id].text_mem.search("hobbies", top_k=1)
+            if results:
+                print(f"{user_id}'s memory: {results[0].memory}")
+
+    print(f"\nFinal active connections: {connection_pool.get_active_connections()}")
+    print("Note: All users share the same database connection!")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/memos/graph_dbs/connection_pool.py b/src/memos/graph_dbs/connection_pool.py
@@ -0,0 +1,81 @@
+"""Connection pool manager for graph databases."""
+
+import threading
+
+from typing import Any
+
+from memos.log import get_logger
+
+
+logger = get_logger(__name__)
+
+
+class Neo4jConnectionPool:
+    """Singleton connection pool for Neo4j databases."""
+
+    _instance = None
+    _lock = threading.Lock()
+
+    def __new__(cls):
+        if cls._instance is None:
+            with cls._lock:
+                if cls._instance is None:
+                    cls._instance = super().__new__(cls)
+                    cls._instance._initialized = False
+        return cls._instance
+
+    def __init__(self):
+        if not getattr(self, "_initialized", False):
+            self._drivers: dict[str, Any] = {}
+            self._driver_lock = threading.Lock()
+            self._initialized = True
+
+    def get_driver(self, uri: str, user: str, password: str):
+        """Get or create a driver for the given connection parameters."""
+        connection_key = f"{uri}:{user}"
+
+        if connection_key not in self._drivers:
+            with self._driver_lock:
+                if connection_key not in self._drivers:
+                    from neo4j import GraphDatabase
+
+                    driver = GraphDatabase.driver(uri, auth=(user, password))
+                    self._drivers[connection_key] = driver
+                    logger.info(f"Created new Neo4j driver for {connection_key}")
+                else:
+                    logger.debug(f"Using existing Neo4j driver for {connection_key}")
+        else:
+            logger.debug(f"Reusing existing Neo4j driver for {connection_key}")
+
+        return self._drivers[connection_key]
+
+    def close_all(self):
+        """Close all connections in the pool."""
+        with self._driver_lock:
+            for connection_key, driver in self._drivers.items():
+                try:
+                    driver.close()
+                    logger.info(f"Closed Neo4j driver for {connection_key}")
+                except Exception as e:
+                    logger.error(f"Error closing driver for {connection_key}: {e}")
+            self._drivers.clear()
+
+    def close_driver(self, uri: str, user: str):
+        """Close a specific driver."""
+        connection_key = f"{uri}:{user}"
+        with self._driver_lock:
+            if connection_key in self._drivers:
+                try:
+                    self._drivers[connection_key].close()
+                    del self._drivers[connection_key]
+                    logger.info(f"Closed and removed Neo4j driver for {connection_key}")
+                except Exception as e:
+                    logger.error(f"Error closing driver for {connection_key}: {e}")
+
+    def get_active_connections(self) -> int:
+        """Get the number of active connections."""
+        return len(self._drivers)
+
+
+# Global connection pool instance
+connection_pool = Neo4jConnectionPool()
diff --git a/src/memos/graph_dbs/factory.py b/src/memos/graph_dbs/factory.py
@@ -4,13 +4,15 @@
 from memos.graph_dbs.base import BaseGraphDB
 from memos.graph_dbs.neo4j import Neo4jGraphDB
 from memos.graph_dbs.neo4j_community import Neo4jCommunityGraphDB
+from memos.graph_dbs.neo4j_pooled import Neo4jPooledGraphDB
 
 
 class GraphStoreFactory(BaseGraphDB):
     """Factory for creating graph store instances."""
 
     backend_to_class: ClassVar[dict[str, Any]] = {
         "neo4j": Neo4jGraphDB,
+        "neo4j-pooled": Neo4jPooledGraphDB,
         "neo4j-community": Neo4jCommunityGraphDB,
     }
 
diff --git a/src/memos/graph_dbs/neo4j_pooled.py b/src/memos/graph_dbs/neo4j_pooled.py
@@ -0,0 +1,60 @@
+"""Neo4j GraphDB implementation with connection pooling."""
+
+from memos.configs.graph_db import Neo4jGraphDBConfig
+from memos.dependency import require_python_package
+from memos.graph_dbs.connection_pool import connection_pool
+from memos.graph_dbs.neo4j import Neo4jGraphDB
+from memos.log import get_logger
+
+
+logger = get_logger(__name__)
+
+
+class Neo4jPooledGraphDB(Neo4jGraphDB):
+    """Neo4j-based implementation with connection pooling to reduce connection overhead."""
+
+    @require_python_package(
+        import_name="neo4j",
+        install_command="pip install neo4j",
+        install_link="https://neo4j.com/docs/python-manual/current/install/",
+    )
+    def __init__(self, config: Neo4jGraphDBConfig):
+        """Neo4j-based implementation with connection pooling.
+
+        This implementation uses a shared connection pool to reuse database connections
+        across multiple instances, reducing the overhead of creating new connections
+        for each user.
+
+        Tenant Modes:
+        - use_multi_db = True:
+            Dedicated Database Mode (Multi-Database Multi-Tenant).
+            Each tenant or logical scope uses a separate Neo4j database.
+            `db_name` is the specific tenant database.
+            `user_name` can be None (optional).
+
+        - use_multi_db = False:
+            Shared Database Multi-Tenant Mode.
+            All tenants share a single Neo4j database.
+            `db_name` is the shared database.
+            `user_name` is required to isolate each tenant's data at the node level.
+            All node queries will enforce `user_name` in WHERE conditions and store it in metadata,
+            but it will be removed automatically before returning to external consumers.
+        """
+        self.config = config
+
+        # Use connection pool instead of creating new driver
+        self.driver = connection_pool.get_driver(config.uri, config.user, config.password)
+        self.db_name = config.db_name
+        self.user_name = config.user_name
+
+        self.system_db_name = "system" if config.use_multi_db else config.db_name
+        if config.auto_create:
+            self._ensure_database_exists()
+
+        # Create only if not exists
+        self.create_index(dimensions=config.embedding_dimension)
+
+        logger.debug(
+            f"Neo4jPooledGraphDB initialized for {config.uri}:{config.user}, "
+            f"total active connections: {connection_pool.get_active_connections()}"
+        )
diff --git a/tests/graph_dbs/test_neo4j_pooled.py b/tests/graph_dbs/test_neo4j_pooled.py