feat: add postgres backend + improve sqlite backend

scream4ik · scream4ik · commit 87fdcd8b9363 · 2025-12-12T23:43:31.000-05:00
diff --git a/examples/postgres_demo.py b/examples/postgres_demo.py
@@ -0,0 +1,121 @@
+from typing import List
+
+from pydantic import BaseModel
+from testcontainers.postgres import PostgresContainer
+
+from memstate import Constraint, MemoryStore
+from memstate.backends.postgres import PostgresStorage
+
+
+# --- Data Model ---
+class UserProfile(BaseModel):
+    email: str
+    full_name: str
+    role: str
+    level: str = "Junior"
+    skills: List[str] = []
+
+
+def print_fact(title, fact):
+    print(title)
+    if fact:
+        print(f"  ID: {fact['id']}")
+        print(f"  Payload: {fact['payload']}")
+    else:
+        print("  None")
+    print()
+
+
+# --- MAIN DEMO ---
+def main():
+    print(f"🚀 MemState + PostgreSQL (JSONB) Demo\n")
+
+    # 1. Start Postgres in Docker (Automatic)
+    print("🐳 Starting Postgres container...")
+    with PostgresContainer("postgres:18-alpine") as postgres:
+
+        # Fix driver string for SQLAlchemy (testcontainers returns old format)
+        raw_url = postgres.get_connection_url()
+        connection_string = raw_url.replace("postgresql+psycopg2://", "postgresql+psycopg://")
+
+        print(f"🔌 Connecting to: {connection_string}")
+
+        # 2. Init Storage & Memory
+        pg_storage = PostgresStorage(connection_string)
+        memory = MemoryStore(pg_storage)
+
+        # 3. Register Schema with SINGLETON Constraint
+        # "email" is the unique key. If we commit a new model with the same email,
+        # MemState will UPDATE the existing record instead of creating a duplicate.
+        memory.register_schema(typename="user_profile", model=UserProfile, constraint=Constraint(singleton_key="email"))
+
+        # --- SCENARIO START ---
+
+        # Step 4: Create Initial Profile (Junior)
+        print(f"\n1️⃣  Agent creates a Junior profile...")
+
+        profile_v1 = UserProfile(
+            email="alex@corp.com", full_name="Alex Dev", role="Backend", level="Junior", skills=["Python"]
+        )
+
+        # Using commit_model (High-Level API)
+        # Note: We do NOT pass fact_id. MemState creates a new one.
+        fact_id = memory.commit_model(profile_v1, actor="Agent_Smith", reason="Initial onboarding")
+
+        current = pg_storage.load(fact_id)
+        print_fact("Current State (Junior):", current)
+
+        # Step 5: Update Profile (Singleton Logic)
+        print(f"2️⃣  Agent finds LinkedIn info. Updating to Senior...")
+
+        profile_v2 = UserProfile(
+            email="alex@corp.com",  # SAME EMAIL triggers Singleton Update
+            full_name="Alex Dev",
+            role="Tech Lead",
+            level="Senior",
+            skills=["Python", "Architecture", "Postgres"],
+        )
+
+        # We perform a new commit. MemState detects email match and performs UPDATE.
+        memory.commit_model(profile_v2, actor="Agent_Smith", reason="LinkedIn data enrichment")
+
+        current = pg_storage.load(fact_id)
+        print_fact("Current State (Senior):", current)
+
+        # Step 6: JSONB Querying
+        print(f"3️⃣  Testing Postgres JSONB Querying...")
+        print("   Query: SELECT * WHERE payload->>'level' == 'Senior'")
+
+        results = memory.query(
+            typename="user_profile", filters={"payload.level": "Senior"}  # MemState converts this to JSONB path
+        )
+
+        if len(results) == 1:
+            print(f"✅ Found correct user: {results[0]['payload']['full_name']}")
+        else:
+            print(f"❌ Query failed!")
+
+        # Step 7: Audit Log (Compliance)
+        print(f"\n4️⃣  Checking Transaction Log (History)...")
+        # Assuming you implemented get_tx_log in PostgresStorage
+        history = pg_storage.get_tx_log(limit=5)
+
+        for tx in history:
+            op = tx.get("op", "UNKNOWN")
+            actor = tx.get("actor", "System")
+            reason = tx.get("reason", "None")
+            print(f"   📜 [{op}] by {actor}: {reason}")
+
+        # Step 8: Rollback
+        print(f"\n5️⃣  Oops! Update was a mistake. Rolling back...")
+        memory.rollback(1)
+
+        final = pg_storage.load(fact_id)
+        print(f"   Restored Level: {final['payload']['level']}")
+
+        if final["payload"]["level"] == "Junior":
+            print(f"\n✨ ACID Rollback successful! Data restored to Junior.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/memstate/backends/postgres.py b/memstate/backends/postgres.py
@@ -0,0 +1,136 @@
+from typing import Any
+
+try:
+    from sqlalchemy import (
+        Column,
+        ColumnElement,
+        Integer,
+        MetaData,
+        String,
+        Table,
+        create_engine,
+        delete,
+        desc,
+        func,
+        select,
+    )
+    from sqlalchemy.dialects.postgresql import JSONB
+    from sqlalchemy.dialects.postgresql import insert as pg_insert
+    from sqlalchemy.engine import Engine
+except ImportError:
+    raise ImportError("Run `pip install postgres[binary]` to use Postgres backend.")
+
+from memstate.backends.base import StorageBackend
+
+
+class PostgresStorage(StorageBackend):
+    def __init__(self, engine_or_url: str | Engine, table_prefix: str = "memstate") -> None:
+        if isinstance(engine_or_url, str):
+            self._engine = create_engine(engine_or_url, future=True)
+        else:
+            self._engine = engine_or_url
+
+        self._metadata = MetaData()
+        self._table_prefix = table_prefix
+
+        # --- Define Tables ---
+        self._facts_table = Table(
+            f"{table_prefix}_facts",
+            self._metadata,
+            Column("id", String, primary_key=True),
+            Column("doc", JSONB, nullable=False),  # Используем JSONB для индексации
+        )
+
+        self._log_table = Table(
+            f"{table_prefix}_log",
+            self._metadata,
+            Column("seq", Integer, primary_key=True, autoincrement=True),
+            Column("entry", JSONB, nullable=False),
+        )
+
+        with self._engine.begin() as conn:
+            self._metadata.create_all(conn)
+
+    def load(self, id: str) -> dict[str, Any] | None:
+        with self._engine.connect() as conn:
+            stmt = select(self._facts_table.c.doc).where(self._facts_table.c.id == id)
+            row = conn.execute(stmt).first()
+            if row:
+                return row[0]  # SQLAlchemy deserializes JSONB automatically
+            return None
+
+    def save(self, fact_data: dict[str, Any]) -> None:
+        # Postgres Native Upsert (INSERT ... ON CONFLICT DO UPDATE)
+        stmt = pg_insert(self._facts_table).values(id=fact_data["id"], doc=fact_data)
+        upsert_stmt = stmt.on_conflict_do_update(
+            index_elements=["id"], set_={"doc": stmt.excluded.doc}  # Conflict over PK
+        )
+
+        with self._engine.begin() as conn:
+            conn.execute(upsert_stmt)
+
+    def delete(self, id: str) -> None:
+        with self._engine.begin() as conn:
+            conn.execute(delete(self._facts_table).where(self._facts_table.c.id == id))
+
+    def query(self, type_filter: str | None = None, json_filters: dict[str, Any] | None = None) -> list[dict[str, Any]]:
+
+        stmt = select(self._facts_table.c.doc)
+
+        # 1. Filter by type (fact)
+        if type_filter:
+            # Postgres JSONB access: doc->>'type'
+            stmt = stmt.where(self._facts_table.c.doc["type"].astext == type_filter)
+
+        # 2. JSON filters (the hardest part)
+        # We expect keys of type "payload.user.id"
+        if json_filters:
+            for key, value in json_filters.items():
+                # Split the path: payload.role -> ['payload', 'role']
+                path_parts = key.split(".")
+
+                # Building a JSONB access chain
+                json_col: ColumnElement[Any] = self._facts_table.c.doc
+
+                # Go deeper to the last key
+                for part in path_parts[:-1]:
+                    json_col = json_col[part]
+
+                # Compare the last key
+                # Important: cast value to JSONB so that types (int/bool/str) work
+                # Or use the @> (contains) operator for reliability
+
+                # Simple option (SQLAlchemy automatically casts types when comparing JSONB)
+                stmt = stmt.where(json_col[path_parts[-1]] == func.to_jsonb(value))
+
+        with self._engine.connect() as conn:
+            rows = conn.execute(stmt).all()
+            return [r[0] for r in rows]
+
+    def append_tx(self, tx_data: dict[str, Any]) -> None:
+        with self._engine.begin() as conn:
+            conn.execute(self._log_table.insert().values(entry=tx_data))
+
+    def get_tx_log(self, limit: int = 100, offset: int = 0) -> list[dict[str, Any]]:
+        stmt = select(self._log_table.c.entry).order_by(desc(self._log_table.c.seq)).limit(limit).offset(offset)
+        with self._engine.connect() as conn:
+            rows = conn.execute(stmt).all()
+            return [r[0] for r in rows]
+
+    def delete_session(self, session_id: str) -> list[str]:
+        # 1. Find the ID to delete
+        # WHERE doc->>'session_id' == session_id
+        find_stmt = select(self._facts_table.c.id).where(self._facts_table.c.doc["session_id"].astext == session_id)
+
+        with self._engine.connect() as conn:
+            ids_to_delete = [r[0] for r in conn.execute(find_stmt).all()]
+
+        if not ids_to_delete:
+            return []
+
+        # 2. Delete
+        del_stmt = delete(self._facts_table).where(self._facts_table.c.id.in_(ids_to_delete))
+        with self._engine.begin() as conn:
+            conn.execute(del_stmt)
+
+        return ids_to_delete
diff --git a/memstate/backends/redis.py b/memstate/backends/redis.py
@@ -6,14 +6,11 @@
 try:
     import redis
 except ImportError:
-    redis = None  # type: ignore[assignment]
+    raise ImportError("redis package is required. pip install redis")
 
 
 class RedisStorage(StorageBackend):
     def __init__(self, client_or_url: Union[str, "redis.Redis"] = "redis://localhost:6379/0") -> None:
-        if not redis:
-            raise ImportError("redis package is required. pip install redis")
-
         self.prefix = "mem:"
 
         if isinstance(client_or_url, str):
diff --git a/memstate/backends/sqlite.py b/memstate/backends/sqlite.py
@@ -1,4 +1,5 @@
 import json
+import re
 import sqlite3
 import threading
 from typing import Any
@@ -28,6 +29,7 @@ def _init_db(self) -> None:
         with self._lock:
             c = self._conn.cursor()
             c.execute("PRAGMA journal_mode=WAL;")
+            c.execute("PRAGMA synchronous=NORMAL;")
 
             c.execute(
                 """
@@ -41,6 +43,7 @@ def _init_db(self) -> None:
                       """
             )
             c.execute("CREATE INDEX IF NOT EXISTS idx_facts_type ON facts(type)")
+            c.execute("CREATE INDEX IF NOT EXISTS idx_facts_session ON facts(json_extract(data, '$.session_id'))")
             c.execute(
                 """
                       CREATE TABLE IF NOT EXISTS tx_log
@@ -93,6 +96,8 @@ def query(self, type_filter: str | None = None, json_filters: dict[str, Any] | N
 
         if json_filters:
             for key, value in json_filters.items():
+                if not re.match(r"^[a-zA-Z0-9_.]+$", key):
+                    raise ValueError(f"Invalid characters in filter key: {key}")
                 query += f" AND json_extract(data, '$.{key}') = ?"
                 params.append(value)
 
@@ -125,14 +130,10 @@ def delete_session(self, session_id: str) -> list[str]:
         with self._lock:
             c = self._conn.cursor()
 
-            c.execute("SELECT id FROM facts WHERE json_extract(data, '$.session_id') = ?", (session_id,))
+            c.execute("DELETE FROM facts WHERE json_extract(data, '$.session_id') = ? RETURNING id", (session_id,))
             rows = c.fetchall()
             ids = [row["id"] for row in rows]
-
-            if ids:
-                c.execute("DELETE FROM facts WHERE json_extract(data, '$.session_id') = ?", (session_id,))
-                self._conn.commit()
-
+            self._conn.commit()
             return ids
 
     def close(self):
diff --git a/memstate/storage.py b/memstate/storage.py
@@ -27,7 +27,7 @@ def validate(self, typename: str, payload: dict[str, Any]) -> dict[str, Any]:
             return payload
         try:
             instance = model_cls.model_validate(payload)
-            return instance.model_dump()
+            return instance.model_dump(mode="json")
         except ValidationError as e:
             raise ValidationFailed(str(e))
 
@@ -116,7 +116,7 @@ def commit(
                     op = Operation.COMMIT_EPHEMERAL if ephemeral else Operation.COMMIT
 
             try:
-                new_state = fact.model_dump()
+                new_state = fact.model_dump(mode="json")
                 self.storage.save(new_state)
                 self._log_tx(op, fact.id, previous_state, new_state, actor, reason)
                 self._notify_hooks(op, fact.id, fact)
@@ -153,7 +153,9 @@ def commit_model(
                 f"Please call memory.register_schema('your_type_name', {model.__class__.__name__}) first."
             )
 
-        fact = Fact(id=fact_id or str(uuid.uuid4()), type=schema_type, payload=model.model_dump(), source=source)
+        fact = Fact(
+            id=fact_id or str(uuid.uuid4()), type=schema_type, payload=model.model_dump(mode="json"), source=source
+        )
 
         return self.commit(fact, session_id=session_id, ephemeral=ephemeral, actor=actor, reason=reason)
 
@@ -279,4 +281,4 @@ def _log_tx(
             actor=actor,
             reason=reason,
         )
-        self.storage.append_tx(tx.model_dump())
+        self.storage.append_tx(tx.model_dump(mode="json"))
diff --git a/pyproject.toml b/pyproject.toml
@@ -48,6 +48,7 @@ dependencies = [
 redis = ["redis>=7.1.0"]
 langgraph = ["langgraph>=1.0.4"]
 chromadb = ["chromadb>=1.3.5"]
+postgres = ["sqlalchemy>=2.0.0", "psycopg[binary]>=3.3.2"]
 
 [dependency-groups]
 dev = [
@@ -59,6 +60,7 @@ dev = [
     "pre-commit>=4.4.0",
     "pytest>=9.0.1",
     "ruff>=0.14.6",
+    "testcontainers>=4.13.3",
 ]
 
 [project.urls]
diff --git a/tests/test_backends.py b/tests/test_backends.py
diff --git a/tests/test_schema_evolution.py b/tests/test_schema_evolution.py
diff --git a/uv.lock b/uv.lock