improve validation for memory:// urls, add examples to build_context

phernandez · phernandez · commit c5c70cb0f44d · 2025-06-04T00:16:33.000-05:00
Signed-off-by: phernandez &lt;paul@basicmachines.co&gt;
diff --git a/src/basic_memory/mcp/tools/build_context.py b/src/basic_memory/mcp/tools/build_context.py
@@ -13,20 +13,24 @@
     GraphContext,
     MemoryUrl,
     memory_url_path,
-    normalize_memory_url,
 )
 
 
 @mcp.tool(
     description="""Build context from a memory:// URI to continue conversations naturally.
     
     Use this to follow up on previous discussions or explore related topics.
+    
+    Memory URL Format:
+    - Use paths like "folder/note" or "memory://folder/note" 
+    - Pattern matching: "folder/*" matches all notes in folder
+    - Valid characters: letters, numbers, hyphens, underscores, forward slashes
+    - Avoid: double slashes (//), angle brackets (<>), quotes, pipes (|)
+    - Examples: "specs/search", "projects/basic-memory", "notes/*"
+    
     Timeframes support natural language like:
-    - "2 days ago"
-    - "last week" 
-    - "today"
-    - "3 months ago"
-    Or standard formats like "7d", "24h"
+    - "2 days ago", "last week", "today", "3 months ago"
+    - Or standard formats like "7d", "24h"
     """,
 )
 async def build_context(
@@ -76,7 +80,7 @@ async def build_context(
         build_context("memory://specs/search", project="work-project")
     """
     logger.info(f"Building context from {url}")
-    url = normalize_memory_url(url)
+    # URL is already validated and normalized by MemoryUrl type annotation
 
     active_project = get_active_project(project)
     project_url = active_project.project_url
diff --git a/src/basic_memory/mcp/tools/canvas.py b/src/basic_memory/mcp/tools/canvas.py
@@ -35,7 +35,8 @@ async def canvas(
         nodes: List of node objects following JSON Canvas 1.0 spec
         edges: List of edge objects following JSON Canvas 1.0 spec
         title: The title of the canvas (will be saved as title.canvas)
-        folder: The folder where the file should be saved
+        folder: Folder path relative to project root where the canvas should be saved.
+                Use forward slashes (/) as separators. Examples: "diagrams", "projects/2025", "visual/maps"
         project: Optional project name to create canvas in. If not provided, uses current active project.
 
     Returns:
diff --git a/src/basic_memory/mcp/tools/write_note.py b/src/basic_memory/mcp/tools/write_note.py
@@ -54,7 +54,8 @@ async def write_note(
     Args:
         title: The title of the note
         content: Markdown content for the note, can include observations and relations
-        folder: the folder where the file should be saved
+        folder: Folder path relative to project root where the file should be saved.
+                Use forward slashes (/) as separators. Examples: "notes", "projects/2025", "research/ml"
         tags: Tags to categorize the note. Can be a list of strings, a comma-separated string, or None.
               Note: If passing from external MCP clients, use a string format (e.g. "tag1,tag2,tag3")
         project: Optional project name to write to. If not provided, uses current active project.
diff --git a/src/basic_memory/schemas/memory.py b/src/basic_memory/schemas/memory.py
@@ -9,31 +9,88 @@
 from basic_memory.schemas.search import SearchItemType
 
 
+def validate_memory_url_path(path: str) -> bool:
+    """Validate that a memory URL path is well-formed.
+
+    Args:
+        path: The path part of a memory URL (without memory:// prefix)
+
+    Returns:
+        True if the path is valid, False otherwise
+
+    Examples:
+        >>> validate_memory_url_path("specs/search")
+        True
+        >>> validate_memory_url_path("memory//test")  # Double slash
+        False
+        >>> validate_memory_url_path("invalid://test")  # Contains protocol
+        False
+    """
+    if not path or not path.strip():
+        return False
+
+    # Check for invalid protocol schemes within the path first (more specific)
+    if "://" in path:
+        return False
+
+    # Check for double slashes (except at the beginning for absolute paths)
+    if "//" in path:
+        return False
+
+    # Check for invalid characters (excluding * which is used for pattern matching)
+    invalid_chars = {"<", ">", '"', "|", "?"}
+    if any(char in path for char in invalid_chars):
+        return False
+
+    return True
+
+
 def normalize_memory_url(url: str | None) -> str:
-    """Normalize a MemoryUrl string.
+    """Normalize a MemoryUrl string with validation.
 
     Args:
         url: A path like "specs/search" or "memory://specs/search"
 
     Returns:
         Normalized URL starting with memory://
 
+    Raises:
+        ValueError: If the URL path is malformed
+
     Examples:
         >>> normalize_memory_url("specs/search")
         'memory://specs/search'
         >>> normalize_memory_url("memory://specs/search")
         'memory://specs/search'
+        >>> normalize_memory_url("memory//test")
+        Traceback (most recent call last):
+        ...
+        ValueError: Invalid memory URL path: 'memory//test' contains double slashes
     """
     if not url:
         return ""
 
     clean_path = url.removeprefix("memory://")
+
+    # Validate the extracted path
+    if not validate_memory_url_path(clean_path):
+        # Provide specific error messages for common issues
+        if "://" in clean_path:
+            raise ValueError(f"Invalid memory URL path: '{clean_path}' contains protocol scheme")
+        elif "//" in clean_path:
+            raise ValueError(f"Invalid memory URL path: '{clean_path}' contains double slashes")
+        elif not clean_path.strip():
+            raise ValueError("Memory URL path cannot be empty or whitespace")
+        else:
+            raise ValueError(f"Invalid memory URL path: '{clean_path}' contains invalid characters")
+
     return f"memory://{clean_path}"
 
 
 MemoryUrl = Annotated[
     str,
     BeforeValidator(str.strip),  # Clean whitespace
+    BeforeValidator(normalize_memory_url),  # Validate and normalize the URL
     MinLen(1),
     MaxLen(2028),
 ]
diff --git a/test-int/mcp/test_build_context_validation.py b/test-int/mcp/test_build_context_validation.py
@@ -0,0 +1,172 @@
+"""Integration tests for build_context memory URL validation."""
+
+import pytest
+from fastmcp import Client
+
+
+@pytest.mark.asyncio
+async def test_build_context_valid_urls(mcp_server, app):
+    """Test that build_context works with valid memory URLs."""
+
+    async with Client(mcp_server) as client:
+        # Create a test note to ensure we have something to find
+        await client.call_tool(
+            "write_note",
+            {
+                "title": "URL Validation Test",
+                "folder": "testing",
+                "content": "# URL Validation Test\n\nThis note tests URL validation.",
+                "tags": "test,validation",
+            },
+        )
+
+        # Test various valid URL formats
+        valid_urls = [
+            "memory://testing/url-validation-test",  # Full memory URL
+            "testing/url-validation-test",  # Relative path
+            "testing/*",  # Pattern matching
+        ]
+
+        for url in valid_urls:
+            result = await client.call_tool("build_context", {"url": url})
+
+            # Should return a valid GraphContext response
+            assert len(result) == 1
+            response = result[0].text
+            assert '"results"' in response  # Should contain results structure
+            assert '"metadata"' in response  # Should contain metadata
+
+
+@pytest.mark.asyncio
+async def test_build_context_invalid_urls_fail_validation(mcp_server, app):
+    """Test that build_context properly validates and rejects invalid memory URLs."""
+
+    async with Client(mcp_server) as client:
+        # Test cases: (invalid_url, expected_error_fragment)
+        invalid_test_cases = [
+            ("memory//test", "double slashes"),
+            ("invalid://test", "protocol scheme"),
+            ("notes<brackets>", "invalid characters"),
+            ('notes"quotes"', "invalid characters"),
+        ]
+
+        for invalid_url, expected_error in invalid_test_cases:
+            with pytest.raises(Exception) as exc_info:
+                await client.call_tool("build_context", {"url": invalid_url})
+
+            error_message = str(exc_info.value).lower()
+            assert expected_error in error_message, (
+                f"URL '{invalid_url}' should fail with '{expected_error}' error"
+            )
+
+
+@pytest.mark.asyncio
+async def test_build_context_empty_urls_fail_validation(mcp_server, app):
+    """Test that empty or whitespace-only URLs fail validation."""
+
+    async with Client(mcp_server) as client:
+        # These should fail MinLen validation
+        empty_urls = [
+            "",  # Empty string
+            "   ",  # Whitespace only
+        ]
+
+        for empty_url in empty_urls:
+            with pytest.raises(Exception) as exc_info:
+                await client.call_tool("build_context", {"url": empty_url})
+
+            error_message = str(exc_info.value)
+            # Should fail with validation error (either MinLen or our custom validation)
+            assert (
+                "at least 1" in error_message
+                or "too_short" in error_message
+                or "empty or whitespace" in error_message
+                or "value_error" in error_message
+            )
+
+
+@pytest.mark.asyncio
+async def test_build_context_nonexistent_urls_return_empty_results(mcp_server, app):
+    """Test that valid but nonexistent URLs return empty results (not errors)."""
+
+    async with Client(mcp_server) as client:
+        # These are valid URL formats but don't exist in the system
+        nonexistent_valid_urls = [
+            "memory://nonexistent/note",
+            "nonexistent/note",
+            "missing/*",
+        ]
+
+        for url in nonexistent_valid_urls:
+            result = await client.call_tool("build_context", {"url": url})
+
+            # Should return valid response with empty results
+            assert len(result) == 1
+            response = result[0].text
+            assert '"results": []' in response  # Empty results
+            assert '"total_results": 0' in response  # Zero count
+            assert '"metadata"' in response  # But should have metadata
+
+
+@pytest.mark.asyncio
+async def test_build_context_error_messages_are_helpful(mcp_server, app):
+    """Test that validation error messages provide helpful guidance."""
+
+    async with Client(mcp_server) as client:
+        # Test double slash error message
+        with pytest.raises(Exception) as exc_info:
+            await client.call_tool("build_context", {"url": "memory//bad"})
+
+        error_msg = str(exc_info.value).lower()
+        # Should contain validation error info
+        assert (
+            "double slashes" in error_msg
+            or "value_error" in error_msg
+            or "validation error" in error_msg
+        )
+
+        # Test protocol scheme error message
+        with pytest.raises(Exception) as exc_info:
+            await client.call_tool("build_context", {"url": "http://example.com"})
+
+        error_msg = str(exc_info.value).lower()
+        assert (
+            "protocol scheme" in error_msg
+            or "protocol" in error_msg
+            or "value_error" in error_msg
+            or "validation error" in error_msg
+        )
+
+
+@pytest.mark.asyncio
+async def test_build_context_pattern_matching_works(mcp_server, app):
+    """Test that valid pattern matching URLs work correctly."""
+
+    async with Client(mcp_server) as client:
+        # Create multiple test notes
+        test_notes = [
+            ("Pattern Test One", "patterns", "# Pattern Test One\n\nFirst pattern test."),
+            ("Pattern Test Two", "patterns", "# Pattern Test Two\n\nSecond pattern test."),
+            ("Other Note", "other", "# Other Note\n\nNot a pattern match."),
+        ]
+
+        for title, folder, content in test_notes:
+            await client.call_tool(
+                "write_note",
+                {
+                    "title": title,
+                    "folder": folder,
+                    "content": content,
+                },
+            )
+
+        # Test pattern matching
+        result = await client.call_tool("build_context", {"url": "patterns/*"})
+
+        assert len(result) == 1
+        response = result[0].text
+
+        # Should find the pattern matches but not the other note
+        assert '"total_results": 2' in response or '"primary_count": 2' in response
+        assert "Pattern Test" in response
+        assert "Other Note" not in response
diff --git a/tests/schemas/test_memory_url_validation.py b/tests/schemas/test_memory_url_validation.py
diff --git a/tests/sync/test_sync_service.py b/tests/sync/test_sync_service.py