diff --git a/src/mcp/server/fastmcp/__init__.py b/src/mcp/server/fastmcp/__init__.py index 84b052078..f8f9c1c4c 100644 --- a/src/mcp/server/fastmcp/__init__.py +++ b/src/mcp/server/fastmcp/__init__.py @@ -3,7 +3,7 @@ from importlib.metadata import version from .server import Context, FastMCP -from .utilities.types import Image +from .utilities.types import Audio, Image __version__ = version("mcp") -__all__ = ["FastMCP", "Context", "Image"] +__all__ = ["FastMCP", "Context", "Image", "Audio"] diff --git a/src/mcp/server/fastmcp/utilities/func_metadata.py b/src/mcp/server/fastmcp/utilities/func_metadata.py index 70be8796d..a4cb8ac5b 100644 --- a/src/mcp/server/fastmcp/utilities/func_metadata.py +++ b/src/mcp/server/fastmcp/utilities/func_metadata.py @@ -21,7 +21,7 @@ from mcp.server.fastmcp.exceptions import InvalidSignature from mcp.server.fastmcp.utilities.logging import get_logger -from mcp.server.fastmcp.utilities.types import Image +from mcp.server.fastmcp.utilities.types import Audio, Image from mcp.types import ContentBlock, TextContent logger = get_logger(__name__) @@ -506,6 +506,9 @@ def _convert_to_content( if isinstance(result, Image): return [result.to_image_content()] + if isinstance(result, Audio): + return [result.to_audio_content()] + if isinstance(result, list | tuple): return list( chain.from_iterable( diff --git a/src/mcp/server/fastmcp/utilities/types.py b/src/mcp/server/fastmcp/utilities/types.py index ccaa3d69a..1be6f8274 100644 --- a/src/mcp/server/fastmcp/utilities/types.py +++ b/src/mcp/server/fastmcp/utilities/types.py @@ -3,7 +3,7 @@ import base64 from pathlib import Path -from mcp.types import ImageContent +from mcp.types import AudioContent, ImageContent class Image: @@ -52,3 +52,50 @@ def to_image_content(self) -> ImageContent: raise ValueError("No image data available") return ImageContent(type="image", data=data, mimeType=self._mime_type) + + +class Audio: + """Helper class for returning audio from tools.""" + + def __init__( + self, + path: str | Path | None = None, + data: bytes | None = None, + format: str | None = None, + ): + if not bool(path) ^ bool(data): + raise ValueError("Either path or data can be provided") + + self.path = Path(path) if path else None + self.data = data + self._format = format + self._mime_type = self._get_mime_type() + + def _get_mime_type(self) -> str: + """Get MIME type from format or guess from file extension.""" + if self._format: + return f"audio/{self._format.lower()}" + + if self.path: + suffix = self.path.suffix.lower() + return { + ".wav": "audio/wav", + ".mp3": "audio/mpeg", + ".ogg": "audio/ogg", + ".flac": "audio/flac", + ".aac": "audio/aac", + ".m4a": "audio/mp4", + }.get(suffix, "application/octet-stream") + return "audio/wav" # default for raw binary data + + def to_audio_content(self) -> AudioContent: + """Convert to MCP AudioContent.""" + if self.path: + with open(self.path, "rb") as f: + data = base64.b64encode(f.read()).decode() + elif self.data is not None: + data = base64.b64encode(self.data).decode() + else: + raise ValueError("No audio data available") + + return AudioContent(type="audio", data=data, mimeType=self._mime_type) diff --git a/tests/server/fastmcp/test_server.py b/tests/server/fastmcp/test_server.py index a4e72d1e9..3f921b588 100644 --- a/tests/server/fastmcp/test_server.py +++ b/tests/server/fastmcp/test_server.py @@ -10,7 +10,7 @@ from mcp.server.fastmcp import Context, FastMCP from mcp.server.fastmcp.prompts.base import Message, UserMessage from mcp.server.fastmcp.resources import FileResource, FunctionResource -from mcp.server.fastmcp.utilities.types import Image +from mcp.server.fastmcp.utilities.types import Audio, Image from mcp.server.session import ServerSession from mcp.shared.exceptions import McpError from mcp.shared.memory import ( @@ -195,6 +195,10 @@ def image_tool_fn(path: str) -> Image: return Image(path) +def audio_tool_fn(path: str) -> Audio: + return Audio(path) + + def mixed_content_tool_fn() -> list[ContentBlock]: return [ TextContent(type="text", text="Hello"), @@ -300,6 +304,60 @@ async def test_tool_image_helper(self, tmp_path: Path): # Check structured content - Image return type should NOT have structured output assert result.structuredContent is None + @pytest.mark.anyio + async def test_tool_audio_helper(self, tmp_path: Path): + # Create a test audio + audio_path = tmp_path / "test.wav" + audio_path.write_bytes(b"fake wav data") + + mcp = FastMCP() + mcp.add_tool(audio_tool_fn) + async with client_session(mcp._mcp_server) as client: + result = await client.call_tool("audio_tool_fn", {"path": str(audio_path)}) + assert len(result.content) == 1 + content = result.content[0] + assert isinstance(content, AudioContent) + assert content.type == "audio" + assert content.mimeType == "audio/wav" + # Verify base64 encoding + decoded = base64.b64decode(content.data) + assert decoded == b"fake wav data" + # Check structured content - Image return type should NOT have structured output + assert result.structuredContent is None + + @pytest.mark.parametrize( + "filename,expected_mime_type", + [ + ("test.wav", "audio/wav"), + ("test.mp3", "audio/mpeg"), + ("test.ogg", "audio/ogg"), + ("test.flac", "audio/flac"), + ("test.aac", "audio/aac"), + ("test.m4a", "audio/mp4"), + ("test.unknown", "application/octet-stream"), # Unknown extension fallback + ], + ) + @pytest.mark.anyio + async def test_tool_audio_suffix_detection(self, tmp_path: Path, filename: str, expected_mime_type: str): + """Test that Audio helper correctly detects MIME types from file suffixes""" + mcp = FastMCP() + mcp.add_tool(audio_tool_fn) + + # Create a test audio file with the specific extension + audio_path = tmp_path / filename + audio_path.write_bytes(b"fake audio data") + + async with client_session(mcp._mcp_server) as client: + result = await client.call_tool("audio_tool_fn", {"path": str(audio_path)}) + assert len(result.content) == 1 + content = result.content[0] + assert isinstance(content, AudioContent) + assert content.type == "audio" + assert content.mimeType == expected_mime_type + # Verify base64 encoding + decoded = base64.b64decode(content.data) + assert decoded == b"fake audio data" + @pytest.mark.anyio async def test_tool_mixed_content(self): mcp = FastMCP() @@ -332,19 +390,24 @@ async def test_tool_mixed_content(self): assert structured_result[i][key] == value @pytest.mark.anyio - async def test_tool_mixed_list_with_image(self, tmp_path: Path): + async def test_tool_mixed_list_with_audio_and_image(self, tmp_path: Path): """Test that lists containing Image objects and other types are handled correctly""" # Create a test image image_path = tmp_path / "test.png" image_path.write_bytes(b"test image data") + # Create a test audio + audio_path = tmp_path / "test.wav" + audio_path.write_bytes(b"test audio data") + # TODO(Marcelo): It seems if we add the proper type hint, it generates an invalid JSON schema. # We need to fix this. def mixed_list_fn() -> list: # type: ignore return [ # type: ignore "text message", Image(image_path), + Audio(audio_path), {"key": "value"}, TextContent(type="text", text="direct content"), ] @@ -353,7 +416,7 @@ def mixed_list_fn() -> list: # type: ignore mcp.add_tool(mixed_list_fn) # type: ignore async with client_session(mcp._mcp_server) as client: result = await client.call_tool("mixed_list_fn", {}) - assert len(result.content) == 4 + assert len(result.content) == 5 # Check text conversion content1 = result.content[0] assert isinstance(content1, TextContent) @@ -363,14 +426,19 @@ def mixed_list_fn() -> list: # type: ignore assert isinstance(content2, ImageContent) assert content2.mimeType == "image/png" assert base64.b64decode(content2.data) == b"test image data" - # Check dict conversion + # Check audio conversion content3 = result.content[2] - assert isinstance(content3, TextContent) - assert '"key": "value"' in content3.text - # Check direct TextContent + assert isinstance(content3, AudioContent) + assert content3.mimeType == "audio/wav" + assert base64.b64decode(content3.data) == b"test audio data" + # Check dict conversion content4 = result.content[3] assert isinstance(content4, TextContent) - assert content4.text == "direct content" + assert '"key": "value"' in content4.text + # Check direct TextContent + content5 = result.content[4] + assert isinstance(content5, TextContent) + assert content5.text == "direct content" # Check structured content - untyped list with Image objects should NOT have structured output assert result.structuredContent is None