Fix Claude Desktop parameter serialization issues with comprehensive testing

IvanBiruk · IvanBiruk · commit 01d4dbed73c9 · 2025-09-22T00:29:51.000+02:00
- Add normalize_data_source_ids() utility function for DRY parameter handling
- Update tool type annotations to accept Union[str, List[str]] for Claude Desktop compatibility
- Handle JSON-encoded strings, plain strings, and proper arrays uniformly
- Add 13 comprehensive tests covering all parameter formats and edge cases
- Refactor both codebase_search and codebase_consultant tools to use shared utility
- Maintain backward compatibility with existing proper array inputs
- All 39 tests pass ensuring robustness

Fixes the "Input validation error" issues when Claude Desktop sends incorrectly
serialized array parameters as JSON strings or plain strings.
diff --git a/src/tests/test_parameter_normalization.py b/src/tests/test_parameter_normalization.py
@@ -0,0 +1,135 @@
+"""Tests for parameter normalization functionality."""
+
+import pytest
+import json
+from utils.errors import normalize_data_source_ids
+
+
+class TestNormalizeDataSourceIds:
+    """Test the normalize_data_source_ids function with various input formats."""
+
+    def test_proper_array_input(self):
+        """Test that proper arrays are passed through unchanged."""
+        input_data = ["repo1", "repo2", "repo3"]
+        result = normalize_data_source_ids(input_data)
+        assert result == ["repo1", "repo2", "repo3"]
+
+    def test_single_string_input(self):
+        """Test that single string is converted to array."""
+        input_data = "repo1"
+        result = normalize_data_source_ids(input_data)
+        assert result == ["repo1"]
+
+    def test_json_encoded_string_input(self):
+        """Test that JSON-encoded strings are properly parsed."""
+        input_data = '["repo1", "repo2"]'
+        result = normalize_data_source_ids(input_data)
+        assert result == ["repo1", "repo2"]
+
+    def test_malformed_json_string_fallback(self):
+        """Test that malformed JSON strings fall back to single ID."""
+        input_data = '["repo1", "repo2"'  # Missing closing bracket
+        result = normalize_data_source_ids(input_data)
+        assert result == ['["repo1", "repo2"']  # Treated as single ID
+
+    def test_empty_inputs(self):
+        """Test various empty input types."""
+        assert normalize_data_source_ids(None) == []
+        assert normalize_data_source_ids("") == []
+        assert normalize_data_source_ids([]) == []
+
+    def test_mixed_array_with_dicts(self):
+        """Test arrays containing both strings and dict objects."""
+        input_data = [
+            "repo1",
+            {"id": "repo2", "type": "repository"},
+            "repo3",
+            {"id": "workspace1", "type": "workspace"}
+        ]
+        result = normalize_data_source_ids(input_data)
+        assert result == ["repo1", "repo2", "repo3", "workspace1"]
+
+    def test_dict_without_id(self):
+        """Test that dicts without 'id' field are skipped."""
+        input_data = [
+            "repo1",
+            {"name": "some-repo", "type": "repository"},  # No 'id' field
+            "repo2"
+        ]
+        result = normalize_data_source_ids(input_data)
+        assert result == ["repo1", "repo2"]
+
+    def test_empty_strings_preserved(self):
+        """Test that empty strings in arrays are preserved (might be intentional)."""
+        input_data = ["repo1", "", "repo2", "   ", "repo3"]
+        result = normalize_data_source_ids(input_data)
+        assert result == ["repo1", "", "repo2", "   ", "repo3"]  # All strings preserved
+
+    def test_non_list_non_string_input(self):
+        """Test handling of unexpected input types."""
+        result = normalize_data_source_ids(123)
+        assert result == ["123"]
+
+        result = normalize_data_source_ids({"id": "repo1"})
+        assert result == ["{'id': 'repo1'}"]
+
+    def test_claude_desktop_scenarios(self):
+        """Test specific scenarios from Claude Desktop serialization issues."""
+        # Scenario 1: JSON string as seen in Claude Desktop logs
+        claude_input_1 = '["67db4097fa23c0a98a8495c2"]'
+        result_1 = normalize_data_source_ids(claude_input_1)
+        assert result_1 == ["67db4097fa23c0a98a8495c2"]
+
+        # Scenario 2: Plain string as seen in Claude Desktop logs
+        claude_input_2 = "67db4097fa23c0a98a8495c2"
+        result_2 = normalize_data_source_ids(claude_input_2)
+        assert result_2 == ["67db4097fa23c0a98a8495c2"]
+
+        # Scenario 3: Multiple IDs in JSON string
+        claude_input_3 = '["repo1", "repo2", "workspace1"]'
+        result_3 = normalize_data_source_ids(claude_input_3)
+        assert result_3 == ["repo1", "repo2", "workspace1"]
+
+    def test_edge_cases(self):
+        """Test various edge cases."""
+        # Whitespace-only JSON string
+        assert normalize_data_source_ids("[]") == []
+        assert normalize_data_source_ids("[   ]") == []
+
+        # Single item JSON array
+        assert normalize_data_source_ids('["single"]') == ["single"]
+
+        # JSON array with empty strings
+        assert normalize_data_source_ids('["repo1", "", "repo2"]') == ["repo1", "", "repo2"]
+
+
+class TestParameterNormalizationIntegration:
+    """Integration tests to ensure parameter normalization works in tool contexts."""
+
+    def test_search_tool_parameter_handling(self):
+        """Test that search tool properly normalizes various parameter formats."""
+        from tools.search import codebase_search
+        import inspect
+
+        # Verify the function accepts Union[str, List[str]]
+        sig = inspect.signature(codebase_search)
+        data_source_ids_param = sig.parameters['data_source_ids']
+
+        # The annotation should accept both str and List[str]
+        assert 'Union' in str(data_source_ids_param.annotation) or 'str' in str(data_source_ids_param.annotation)
+
+    def test_consultant_tool_parameter_handling(self):
+        """Test that consultant tool properly normalizes various parameter formats."""
+        from tools.chat import codebase_consultant
+        import inspect
+
+        # Verify the function accepts Union[str, List[str]]
+        sig = inspect.signature(codebase_consultant)
+        data_sources_param = sig.parameters['data_sources']
+
+        # The annotation should accept both str and List[str]
+        assert 'Union' in str(data_sources_param.annotation) or 'str' in str(data_sources_param.annotation)
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/src/tools/chat.py b/src/tools/chat.py
@@ -1,20 +1,20 @@
 """Chat completions tool implementation."""
 
 import json
-from typing import Dict, List, Optional
+from typing import Dict, List, Optional, Union
 from urllib.parse import urljoin
 
 import httpx
 from fastmcp import Context
 
 from core import CodeAliveContext, get_api_key_from_context, log_api_request, log_api_response
-from utils import handle_api_error, format_data_source_ids
+from utils import handle_api_error, format_data_source_ids, normalize_data_source_ids
 
 
 async def codebase_consultant(
     ctx: Context,
     question: str,
-    data_sources: Optional[List[str]] = None,
+    data_sources: Optional[Union[str, List[str]]] = None,
     conversation_id: Optional[str] = None
 ) -> str:
     """
@@ -68,6 +68,9 @@ async def codebase_consultant(
     """
     context: CodeAliveContext = ctx.request_context.lifespan_context
 
+    # Normalize data source IDs (handles Claude Desktop serialization issues)
+    data_sources = normalize_data_source_ids(data_sources)
+
     if not question or not question.strip():
         return "Error: No question provided. Please provide a question to ask the consultant."
 
diff --git a/src/tools/search.py b/src/tools/search.py
@@ -1,19 +1,19 @@
 """Search tool implementation."""
 
-from typing import Dict, List, Optional
+from typing import Dict, List, Optional, Union
 from urllib.parse import urljoin
 
 import httpx
 from fastmcp import Context
 
 from core import CodeAliveContext, get_api_key_from_context, log_api_request, log_api_response
-from utils import transform_search_response_to_xml, handle_api_error
+from utils import transform_search_response_to_xml, handle_api_error, normalize_data_source_ids
 
 
 async def codebase_search(
     ctx: Context,
     query: str,
-    data_source_ids: Optional[List[str]] = None,
+    data_source_ids: Optional[Union[str, List[str]]] = None,
     mode: str = "auto",
     include_content: bool = False
 ) -> Dict:
@@ -94,6 +94,9 @@ async def codebase_search(
     """
     context: CodeAliveContext = ctx.request_context.lifespan_context
 
+    # Normalize data source IDs (handles Claude Desktop serialization issues)
+    data_source_ids = normalize_data_source_ids(data_source_ids)
+
     # Validate inputs
     if not query or not query.strip():
         return {"error": "Query cannot be empty. Please provide a search term, function name, or description of the code you're looking for."}
diff --git a/src/utils/__init__.py b/src/utils/__init__.py
@@ -1,10 +1,11 @@
 """Utility functions for CodeAlive MCP server."""
 
 from .response_transformer import transform_search_response_to_xml
-from .errors import handle_api_error, format_data_source_ids
+from .errors import handle_api_error, format_data_source_ids, normalize_data_source_ids
 
 __all__ = [
     'transform_search_response_to_xml',
     'handle_api_error',
-    'format_data_source_ids'
+    'format_data_source_ids',
+    'normalize_data_source_ids'
 ]
diff --git a/src/utils/errors.py b/src/utils/errors.py
@@ -51,6 +51,55 @@ async def handle_api_error(
         return f"Error: {error_msg}. Please check your input parameters and try again."
 
 
+def normalize_data_source_ids(data_sources) -> list:
+    """
+    Normalize data source IDs from various Claude Desktop serialization formats.
+
+    Handles:
+    - Proper arrays: ["id1", "id2"]
+    - JSON-encoded strings: "[\"id1\", \"id2\"]"
+    - Plain strings: "id1"
+    - None/empty values
+
+    Args:
+        data_sources: Data sources in any format from Claude Desktop
+
+    Returns:
+        List of string IDs: ["id1", "id2"]
+    """
+    import json
+
+    if not data_sources:
+        return []
+
+    # Handle string inputs (Claude Desktop serialization issue)
+    if isinstance(data_sources, str):
+        # Handle JSON-encoded string
+        if data_sources.startswith('['):
+            try:
+                data_sources = json.loads(data_sources)
+            except json.JSONDecodeError:
+                # If parsing fails, treat as single ID
+                return [data_sources]
+        else:
+            # Single ID as string
+            return [data_sources]
+
+    # Handle non-list types
+    if not isinstance(data_sources, list):
+        return [str(data_sources)]
+
+    # Already a list - extract string IDs
+    result = []
+    for ds in data_sources:
+        if isinstance(ds, str):
+            result.append(ds)
+        elif isinstance(ds, dict) and ds.get("id"):
+            result.append(ds["id"])
+
+    return result
+
+
 def format_data_source_ids(data_sources: Optional[list]) -> list:
     """
     Convert various data source formats to the API's expected format.