feat: Added Audio to FastMCP

dragonier23 · dragonier23 · commit 992de1025536 · 2025-07-12T15:42:49.000+08:00
diff --git a/src/mcp/server/fastmcp/__init__.py b/src/mcp/server/fastmcp/__init__.py
@@ -3,7 +3,7 @@
 from importlib.metadata import version
 
 from .server import Context, FastMCP
-from .utilities.types import Image
+from .utilities.types import Audio, Image
 
 __version__ = version("mcp")
-__all__ = ["FastMCP", "Context", "Image"]
+__all__ = ["FastMCP", "Context", "Image", "Audio"]
diff --git a/src/mcp/server/fastmcp/utilities/func_metadata.py b/src/mcp/server/fastmcp/utilities/func_metadata.py
@@ -21,7 +21,7 @@
 
 from mcp.server.fastmcp.exceptions import InvalidSignature
 from mcp.server.fastmcp.utilities.logging import get_logger
-from mcp.server.fastmcp.utilities.types import Image
+from mcp.server.fastmcp.utilities.types import Audio, Image
 from mcp.types import ContentBlock, TextContent
 
 logger = get_logger(__name__)
@@ -480,6 +480,9 @@ def _convert_to_content(
     if isinstance(result, Image):
         return [result.to_image_content()]
 
+    if isinstance(result, Audio):
+        return [result.to_audio_content()]
+
     if isinstance(result, list | tuple):
         return list(
             chain.from_iterable(
diff --git a/src/mcp/server/fastmcp/utilities/types.py b/src/mcp/server/fastmcp/utilities/types.py
@@ -3,7 +3,7 @@
 import base64
 from pathlib import Path
 
-from mcp.types import ImageContent
+from mcp.types import AudioContent, ImageContent
 
 
 class Image:
@@ -52,3 +52,52 @@ def to_image_content(self) -> ImageContent:
             raise ValueError("No image data available")
 
         return ImageContent(type="image", data=data, mimeType=self._mime_type)
+
+
+class Audio:
+    """Helper class for returning audio from tools."""
+
+    def __init__(
+        self,
+        path: str | Path | None = None,
+        data: bytes | None = None,
+        format: str | None = None,
+    ):
+        if path is None and data is None:
+            raise ValueError("Either path or data must be provided")
+        if path is not None and data is not None:
+            raise ValueError("Only one of path or data can be provided")
+
+        self.path = Path(path) if path else None
+        self.data = data
+        self._format = format
+        self._mime_type = self._get_mime_type()
+
+    def _get_mime_type(self) -> str:
+        """Get MIME type from format or guess from file extension."""
+        if self._format:
+            return f"audio/{self._format.lower()}"
+
+        if self.path:
+            suffix = self.path.suffix.lower()
+            return {
+                ".wav": "audio/wav",
+                ".mp3": "audio/mpeg",
+                ".ogg": "audio/ogg",
+                ".flac": "audio/flac",
+                ".aac": "audio/aac",
+                ".m4a": "audio/mp4",
+            }.get(suffix, "application/octet-stream")
+        return "audio/wav"  # default for raw binary data
+
+    def to_audio_content(self) -> AudioContent:
+        """Convert to MCP AudioContent."""
+        if self.path:
+            with open(self.path, "rb") as f:
+                data = base64.b64encode(f.read()).decode()
+        elif self.data is not None:
+            data = base64.b64encode(self.data).decode()
+        else:
+            raise ValueError("No audio data available")
+
+        return AudioContent(type="audio", data=data, mimeType=self._mime_type)
diff --git a/tests/server/fastmcp/test_server.py b/tests/server/fastmcp/test_server.py
@@ -10,7 +10,7 @@
 from mcp.server.fastmcp import Context, FastMCP
 from mcp.server.fastmcp.prompts.base import Message, UserMessage
 from mcp.server.fastmcp.resources import FileResource, FunctionResource
-from mcp.server.fastmcp.utilities.types import Image
+from mcp.server.fastmcp.utilities.types import Audio, Image
 from mcp.shared.exceptions import McpError
 from mcp.shared.memory import (
     create_connected_server_and_client_session as client_session,
@@ -194,6 +194,10 @@ def image_tool_fn(path: str) -> Image:
     return Image(path)
 
 
+def audio_tool_fn(path: str) -> Audio:
+    return Audio(path)
+
+
 def mixed_content_tool_fn() -> list[ContentBlock]:
     return [
         TextContent(type="text", text="Hello"),
@@ -299,6 +303,27 @@ async def test_tool_image_helper(self, tmp_path: Path):
             # Check structured content - Image return type should NOT have structured output
             assert result.structuredContent is None
 
+    @pytest.mark.anyio
+    async def test_tool_audio_helper(self, tmp_path: Path):
+        # Create a test audio
+        audio_path = tmp_path / "test.wav"
+        audio_path.write_bytes(b"fake wav data")
+
+        mcp = FastMCP()
+        mcp.add_tool(audio_tool_fn)
+        async with client_session(mcp._mcp_server) as client:
+            result = await client.call_tool("audio_tool_fn", {"path": str(audio_path)})
+            assert len(result.content) == 1
+            content = result.content[0]
+            assert isinstance(content, AudioContent)
+            assert content.type == "audio"
+            assert content.mimeType == "audio/wav"
+            # Verify base64 encoding
+            decoded = base64.b64decode(content.data)
+            assert decoded == b"fake wav data"
+            # Check structured content - Image return type should NOT have structured output
+            assert result.structuredContent is None
+
     @pytest.mark.anyio
     async def test_tool_mixed_content(self):
         mcp = FastMCP()
@@ -371,6 +396,47 @@ def mixed_list_fn() -> list:
             # Check structured content - untyped list with Image objects should NOT have structured output
             assert result.structuredContent is None
 
+    @pytest.mark.anyio
+    async def test_tool_mixed_list_with_audio(self, tmp_path: Path):
+        """Test that lists containing Audio objects and other types are handled
+        correctly"""
+        # Create a test audio
+        audio_path = tmp_path / "test.wav"
+        audio_path.write_bytes(b"test audio data")
+
+        def mixed_list_fn() -> list:
+            return [
+                "text message",
+                Audio(audio_path),
+                {"key": "value"},
+                TextContent(type="text", text="direct content"),
+            ]
+
+        mcp = FastMCP()
+        mcp.add_tool(mixed_list_fn)
+        async with client_session(mcp._mcp_server) as client:
+            result = await client.call_tool("mixed_list_fn", {})
+            assert len(result.content) == 4
+            # Check text conversion
+            content1 = result.content[0]
+            assert isinstance(content1, TextContent)
+            assert content1.text == "text message"
+            # Check audio conversion
+            content2 = result.content[1]
+            assert isinstance(content2, AudioContent)
+            assert content2.mimeType == "audio/wav"
+            assert base64.b64decode(content2.data) == b"test audio data"
+            # Check dict conversion
+            content3 = result.content[2]
+            assert isinstance(content3, TextContent)
+            assert '"key": "value"' in content3.text
+            # Check direct TextContent
+            content4 = result.content[3]
+            assert isinstance(content4, TextContent)
+            assert content4.text == "direct content"
+            # Check structured content - untyped list with Audio objects should NOT have structured output
+            assert result.structuredContent is None
+
     @pytest.mark.anyio
     async def test_tool_structured_output_basemodel(self):
         """Test tool with structured output returning BaseModel"""