Merge branch 'main' into feature/implement-a-simple

abrookins · abrookins · commit 4a8067386d7a · 2025-07-11T14:57:26.000-07:00
diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml
@@ -34,19 +34,15 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: [3.12]  # Not testing with 3.13 at the moment
-        redis-version: ['6.2.6-v9', 'latest']  # 8.0-M03 is not working atm
+        python-version: [3.12]
+        redis-version: ['redis/redis-stack:6.2.6-v9', 'redis:8.0.3', 'redis:latest']
 
     steps:
     - uses: actions/checkout@v3
 
     - name: Set Redis image name
       run: |
-        if [[ "${{ matrix.redis-version }}" == "8.0-M03" ]]; then
-          echo "REDIS_IMAGE=redis:${{ matrix.redis-version }}" >> $GITHUB_ENV
-        else
-          echo "REDIS_IMAGE=redis/redis-stack-server:${{ matrix.redis-version }}" >> $GITHUB_ENV
-        fi
+          echo "REDIS_IMAGE=${{ matrix.redis-version }}" >> $GITHUB_ENV
 
     - name: Set up Python
       uses: actions/setup-python@v4
@@ -81,6 +77,12 @@ jobs:
     - name: Set up Docker Buildx
       uses: docker/setup-buildx-action@v3
 
+    - name: Log in to Docker Hub
+      uses: docker/login-action@v3
+      with:
+        username: ${{ secrets.DOCKER_USERNAME }}
+        password: ${{ secrets.DOCKER_TOKEN }}
+
     - name: Log in to GitHub Container Registry
       uses: docker/login-action@v3
       with:
@@ -103,6 +105,8 @@ jobs:
         platforms: linux/amd64,linux/arm64
         push: true
         tags: |
+          andrewbrookins510/agent-memory-server:latest
+          andrewbrookins510/agent-memory-server:${{ steps.version.outputs.version }}
           ghcr.io/${{ github.repository }}:latest
           ghcr.io/${{ github.repository }}:${{ steps.version.outputs.version }}
         cache-from: type=gha
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -0,0 +1,62 @@
+# Changelog
+
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [0.9.0] - 2025-07-11
+
+*Changes from the initial release:*
+
+### Architecture Evolution
+- **Working Memory (formerly Short-term Memory)**:
+  - Renamed from "short-term memory" to "working memory" to better reflect its purpose
+  - Enhanced with automatic promotion system that moves structured memories to long-term storage in background
+  - Added support for arbitrary JSON data storage alongside memory structures
+  - Improved automatic conversation summarization in working memory, based on token limits
+
+- **Long-term Memory Promotion**:
+  - Implemented seamless flow from working memory to long-term memory via background task processing
+  - Agent only has to think about working memory, long-term memory is managed automatically (but can be managed manually, too)
+  - Use any LangChain `VectorStore` subclass for long-term storage, defaults to `RedisVectorStore`
+  - Structured memories are automatically promoted with vector embeddings and metadata indexing
+  - Deduplication and compaction systems for long-term memory management
+  - Background task worker system using for reliable, scalable memory processing
+
+### Client SDK and Tooling
+  - Working and long-term memory available as tools for LLM integration (LLM can choose to persist a long-term memory or search for long-term memories, etc.)
+  - Higher-level tools support sending in a user's input and getting back a context-enriched prompt, via `/v1/memory/prompt` endpoint
+  - Support for namespace isolation, user separation, and session management
+
+### Search and Retrieval
+  - Vector-based similarity search using OpenAI embeddings
+  - Rich filtering system by session, namespace, topics, entities, timestamps
+  - Hybrid search combining semantic similarity with metadata filtering
+  - RedisVL integration for high-performance vector operations with Redis
+
+### Enhanced Memory Classification:
+  - Semantic memories for facts and preferences
+  - Episodic memories for time-bound events with event dates (requires a timeframe)
+  - Message memories for long-term conversation records (optional)
+  - Automatic topic modeling and entity recognition either using BERTopic or a configured LLM
+  - Rich metadata extraction and indexing
+
+### Authentication and Security
+  - OAuth2/JWT Bearer token authentication with JWKS validation
+  - Multi-provider support (Auth0, AWS Cognito, Okta, Azure AD)
+  - Role-based access control using JWT claims
+  - Development mode with configurable auth bypass
+
+### Operational Features
+- **Comprehensive CLI Interface**:
+  - Commands for server management (`api`, `mcp`, `task-worker`)
+  - Database operations (`rebuild-index`)
+  - Background task scheduling and management
+  - Health monitoring and diagnostics
+
+
+## [0.0.1]
+
+### Initial Release - 2025-04-07
+- Initial release with basic short-term and long-term memory functionality
diff --git a/README.md b/README.md
@@ -7,24 +7,15 @@ A Redis-powered memory server built for AI agents and applications. It manages b
 - **Working Memory**
 
   - Session-scoped storage for messages, structured memories, context, and metadata
-  - Automatically summarizes conversations when they exceed a client-configured window size
+  - Automatically summarizes conversations when they exceed a client-configured (or server-managed) window size
   - Supports all major OpenAI and Anthropic models
   - Automatic (background) promotion of structured memories to long-term storage
 
 - **Long-Term Memory**
 
   - Persistent storage for memories across sessions
-  - **Pluggable Vector Store Backends** - Support for multiple vector databases through LangChain VectorStore interface:
-    - **Redis** (default) - RedisStack with RediSearch
-    - **Chroma** - Open-source vector database
-    - **Pinecone** - Managed vector database service
-    - **Weaviate** - Open-source vector search engine
-    - **Qdrant** - Vector similarity search engine
-    - **Milvus** - Cloud-native vector database
-    - **PostgreSQL/PGVector** - PostgreSQL with vector extensions
-    - **LanceDB** - Embedded vector database
-    - **OpenSearch** - Open-source search and analytics suite
-  - Semantic search to retrieve memories with advanced filtering system
+  - Pluggable Vector Store Backends - Support for any LangChain VectorStore (defaults to Redis)
+  - Semantic search to retrieve memories with advanced filtering
   - Filter by session, user ID, namespace, topics, entities, timestamps, and more
   - Supports both exact match and semantic similarity search
   - Automatic topic modeling for stored memories with BERTopic or configured LLM
@@ -63,11 +54,7 @@ This project is under active development and is **pre-release** software. Think
 
 ### Roadmap
 
-- [x] Long-term memory deduplication and compaction
-- [x] Use a background task system instead of `BackgroundTask`
-- [x] Authentication/authorization hooks (OAuth2/JWT support)
-- [ ] Configurable strategy for moving working memory to long-term memory
-- [ ] Separate Redis connections for long-term and working memory
+- [] Easier RBAC customization: role definitions, more hooks
 
 ## REST API Endpoints
 
diff --git a/agent-memory-client/agent_memory_client/__init__.py b/agent-memory-client/agent_memory_client/__init__.py
@@ -5,7 +5,7 @@
 memory management capabilities for AI agents and applications.
 """
 
-__version__ = "0.9.0b7"
+__version__ = "0.9.1"
 
 from .client import MemoryAPIClient, MemoryClientConfig, create_memory_client
 from .exceptions import (
diff --git a/agent-memory-client/agent_memory_client/models.py b/agent-memory-client/agent_memory_client/models.py
@@ -183,8 +183,8 @@ class WorkingMemory(BaseModel):
     )
 
     # TTL and timestamps
-    ttl_seconds: int = Field(
-        default=3600,  # 1 hour default
+    ttl_seconds: int | None = Field(
+        default=None,  # Persistent by default
         description="TTL for the working memory in seconds",
     )
     last_accessed: datetime = Field(
diff --git a/agent_memory_server/__init__.py b/agent_memory_server/__init__.py
@@ -1,3 +1,3 @@
 """Redis Agent Memory Server - A memory system for conversational AI."""
 
-__version__ = "0.9.0b7"
+__version__ = "0.9.1"
diff --git a/agent_memory_server/long_term_memory.py b/agent_memory_server/long_term_memory.py
@@ -1013,6 +1013,9 @@ async def deduplicate_by_semantic_search(
 
     vector_search_result = search_result.memories if search_result else []
 
+    # Filter out the memory itself from the search results (avoid self-duplication)
+    vector_search_result = [m for m in vector_search_result if m.id != memory.id]
+
     if vector_search_result and len(vector_search_result) > 0:
         # Found semantically similar memories
         similar_memory_ids = [memory.id for memory in vector_search_result]
@@ -1079,8 +1082,6 @@ async def promote_working_memory_to_long_term(
         logger.debug(f"No working memory found for session {session_id}")
         return 0
 
-    print("Current working memory: ", current_working_memory)
-
     # Find memories with no persisted_at (eligible for promotion)
     unpersisted_memories = [
         memory
diff --git a/agent_memory_server/mcp.py b/agent_memory_server/mcp.py
@@ -137,7 +137,14 @@ async def run_sse_async(self):
 
         redis = await get_redis_conn()
         await ensure_search_index_exists(redis)
-        return await super().run_sse_async()
+
+        # Run the SSE server using our custom implementation
+        import uvicorn
+
+        app = self.sse_app()
+        await uvicorn.Server(
+            uvicorn.Config(app, host="0.0.0.0", port=int(self.settings.port))
+        ).serve()
 
     async def run_stdio_async(self):
         """Ensure Redis search index exists before starting STDIO MCP server."""
diff --git a/agent_memory_server/models.py b/agent_memory_server/models.py
@@ -201,8 +201,8 @@ class WorkingMemory(BaseModel):
     )
 
     # TTL and timestamps
-    ttl_seconds: int = Field(
-        default=3600,  # 1 hour default
+    ttl_seconds: int | None = Field(
+        default=None,  # Persistent by default
         description="TTL for the working memory in seconds",
     )
     last_accessed: datetime = Field(
diff --git a/agent_memory_server/working_memory.py b/agent_memory_server/working_memory.py
@@ -121,7 +121,7 @@ async def get_working_memory(
             tokens=working_memory_data.get("tokens", 0),
             session_id=session_id,
             namespace=namespace,
-            ttl_seconds=working_memory_data.get("ttl_seconds", 3600),
+            ttl_seconds=working_memory_data.get("ttl_seconds", None),
             data=working_memory_data.get("data") or {},
             last_accessed=datetime.fromtimestamp(
                 working_memory_data.get("last_accessed", int(time.time())), UTC
@@ -188,18 +188,24 @@ async def set_working_memory(
     }
 
     try:
-        # Store with TTL
-        await redis_client.setex(
-            key,
-            working_memory.ttl_seconds,
-            json.dumps(
-                data, default=json_datetime_handler
-            ),  # Add custom handler for any remaining datetime objects
-        )
-        logger.info(
-            f"Set working memory for session {working_memory.session_id} with TTL {working_memory.ttl_seconds}s"
-        )
-
+        if working_memory.ttl_seconds is not None:
+            # Store with TTL
+            await redis_client.setex(
+                key,
+                working_memory.ttl_seconds,
+                json.dumps(data, default=json_datetime_handler),
+            )
+            logger.info(
+                f"Set working memory for session {working_memory.session_id} with TTL {working_memory.ttl_seconds}s"
+            )
+        else:
+            await redis_client.set(
+                key,
+                json.dumps(data, default=json_datetime_handler),
+            )
+            logger.info(
+                f"Set working memory for session {working_memory.session_id} with no TTL"
+            )
     except Exception as e:
         logger.error(
             f"Error setting working memory for session {working_memory.session_id}: {e}"
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -34,7 +34,7 @@ services:
       dockerfile: Dockerfile
     environment:
       - REDIS_URL=redis://redis:6379
-      - PORT=9000
+      - PORT=9050
       # Add your API keys here or use a .env file
       - OPENAI_API_KEY=${OPENAI_API_KEY}
       - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}
@@ -46,19 +46,41 @@ services:
       - ENABLE_TOPIC_EXTRACTION=True
       - ENABLE_NER=True
     ports:
-      - "9000:9000"
+      - "9050:9000"
     depends_on:
       - redis
     command: ["uv", "run", "agent-memory", "mcp", "--mode", "sse"]
 
+  task-worker:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    environment:
+      - REDIS_URL=redis://redis:6379
+      # Add your API keys here or use a .env file
+      - OPENAI_API_KEY=${OPENAI_API_KEY}
+      - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}
+      # Optional configurations with defaults
+      - LONG_TERM_MEMORY=True
+      - WINDOW_SIZE=20
+      - GENERATION_MODEL=gpt-4o-mini
+      - EMBEDDING_MODEL=text-embedding-3-small
+      - ENABLE_TOPIC_EXTRACTION=True
+      - ENABLE_NER=True
+    depends_on:
+      - redis
+    command: ["uv", "run", "agent-memory", "task-worker"]
+    volumes:
+      - ./agent_memory_server:/app/agent_memory_server
+    restart: unless-stopped
+
   redis:
-    image: redis/redis-stack:latest
+    image: redis:8
     ports:
-      - "16379:6379" # Redis port
-      - "18001:8001" # RedisInsight port
+      - "16380:6379" # Redis port
     volumes:
       - redis_data:/data
-    command: redis-stack-server --save 60 1 --loglevel warning
+    command: redis-server --save "" --loglevel warning --appendonly no --stop-writes-on-bgsave-error no
     healthcheck:
       test: [ "CMD", "redis-cli", "ping" ]
       interval: 30s
diff --git a/examples/memory_prompt_agent.py b/examples/memory_prompt_agent.py
@@ -34,9 +34,13 @@
 
 
 # Configure logging
-logging.basicConfig(level=logging.INFO)
+logging.basicConfig(level=logging.WARNING)
 logger = logging.getLogger(__name__)
 
+# Reduce third-party logging
+logging.getLogger("httpx").setLevel(logging.WARNING)
+logging.getLogger("openai").setLevel(logging.WARNING)
+
 # Environment setup
 MEMORY_SERVER_URL = os.getenv("MEMORY_SERVER_URL", "http://localhost:8000")
 DEFAULT_USER = "demo_user"
@@ -96,7 +100,6 @@ async def cleanup(self):
         """Clean up resources."""
         if self._memory_client:
             await self._memory_client.close()
-            logger.info("Memory client closed")
 
     async def _add_message_to_working_memory(
         self, session_id: str, user_id: str, role: str, content: str
@@ -145,8 +148,6 @@ async def _generate_response(
                 content = content["text"]
             messages.append({"role": msg["role"], "content": str(content)})
 
-        logger.info(f"Total messages for LLM: {len(messages)}")
-
         # Generate response
         response = self.llm.invoke(messages)
         return str(response.content)
diff --git a/examples/travel_agent.py b/examples/travel_agent.py
diff --git a/manual_oauth_qa/README.md b/manual_oauth_qa/README.md
diff --git a/manual_oauth_qa/quick_auth0_setup.sh b/manual_oauth_qa/quick_auth0_setup.sh
diff --git a/tests/conftest.py b/tests/conftest.py
diff --git a/tests/docker-compose.yml b/tests/docker-compose.yml
diff --git a/tests/test_working_memory.py b/tests/test_working_memory.py

Original file line number	Diff line number	Diff line change
`@@ -183,8 +183,8 @@ class WorkingMemory(BaseModel):`
`183`	`183`	`)`
`184`	`184`
`185`	`185`	`# TTL and timestamps`
`186`		`- ttl_seconds: int = Field(`
`187`		`- default=3600, # 1 hour default`
	`186`	`+ ttl_seconds: int \| None = Field(`
	`187`	`+ default=None, # Persistent by default`
`188`	`188`	`description="TTL for the working memory in seconds",`
`189`	`189`	`)`
`190`	`190`	`last_accessed: datetime = Field(`
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,3 @@`
`1`	`1`	`"""Redis Agent Memory Server - A memory system for conversational AI."""`
`2`	`2`
`3`		`-__version__ = "0.9.0b7"`
	`3`	`+__version__ = "0.9.1"`
Original file line number	Diff line number	Diff line change
`@@ -201,8 +201,8 @@ class WorkingMemory(BaseModel):`
`201`	`201`	`)`
`202`	`202`
`203`	`203`	`# TTL and timestamps`
`204`		`- ttl_seconds: int = Field(`
`205`		`- default=3600, # 1 hour default`
	`204`	`+ ttl_seconds: int \| None = Field(`
	`205`	`+ default=None, # Persistent by default`
`206`	`206`	`description="TTL for the working memory in seconds",`
`207`	`207`	`)`
`208`	`208`	`last_accessed: datetime = Field(`