Skip to content

Added Audio to FastMCP #1130

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/mcp/server/fastmcp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from importlib.metadata import version

from .server import Context, FastMCP
from .utilities.types import Image
from .utilities.types import Audio, Image

__version__ = version("mcp")
__all__ = ["FastMCP", "Context", "Image"]
__all__ = ["FastMCP", "Context", "Image", "Audio"]
5 changes: 4 additions & 1 deletion src/mcp/server/fastmcp/utilities/func_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

from mcp.server.fastmcp.exceptions import InvalidSignature
from mcp.server.fastmcp.utilities.logging import get_logger
from mcp.server.fastmcp.utilities.types import Image
from mcp.server.fastmcp.utilities.types import Audio, Image
from mcp.types import ContentBlock, TextContent

logger = get_logger(__name__)
Expand Down Expand Up @@ -506,6 +506,9 @@ def _convert_to_content(
if isinstance(result, Image):
return [result.to_image_content()]

if isinstance(result, Audio):
return [result.to_audio_content()]

if isinstance(result, list | tuple):
return list(
chain.from_iterable(
Expand Down
49 changes: 48 additions & 1 deletion src/mcp/server/fastmcp/utilities/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import base64
from pathlib import Path

from mcp.types import ImageContent
from mcp.types import AudioContent, ImageContent


class Image:
Expand Down Expand Up @@ -52,3 +52,50 @@ def to_image_content(self) -> ImageContent:
raise ValueError("No image data available")

return ImageContent(type="image", data=data, mimeType=self._mime_type)


class Audio:
"""Helper class for returning audio from tools."""

def __init__(
self,
path: str | Path | None = None,
data: bytes | None = None,
format: str | None = None,
):
Comment on lines +60 to +65
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's use @typing.override.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would like to get some clarity as to what we are tying to override, given the audio class dosent inherit from any class at the moment

if not bool(path) ^ bool(data):
raise ValueError("Either path or data can be provided")

self.path = Path(path) if path else None
self.data = data
self._format = format
self._mime_type = self._get_mime_type()

def _get_mime_type(self) -> str:
"""Get MIME type from format or guess from file extension."""
if self._format:
return f"audio/{self._format.lower()}"

if self.path:
suffix = self.path.suffix.lower()
return {
".wav": "audio/wav",
".mp3": "audio/mpeg",
".ogg": "audio/ogg",
".flac": "audio/flac",
".aac": "audio/aac",
".m4a": "audio/mp4",
}.get(suffix, "application/octet-stream")
return "audio/wav" # default for raw binary data

def to_audio_content(self) -> AudioContent:
"""Convert to MCP AudioContent."""
if self.path:
with open(self.path, "rb") as f:
data = base64.b64encode(f.read()).decode()
elif self.data is not None:
data = base64.b64encode(self.data).decode()
else:
raise ValueError("No audio data available")

return AudioContent(type="audio", data=data, mimeType=self._mime_type)
84 changes: 76 additions & 8 deletions tests/server/fastmcp/test_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from mcp.server.fastmcp import Context, FastMCP
from mcp.server.fastmcp.prompts.base import Message, UserMessage
from mcp.server.fastmcp.resources import FileResource, FunctionResource
from mcp.server.fastmcp.utilities.types import Image
from mcp.server.fastmcp.utilities.types import Audio, Image
from mcp.server.session import ServerSession
from mcp.shared.exceptions import McpError
from mcp.shared.memory import (
Expand Down Expand Up @@ -195,6 +195,10 @@ def image_tool_fn(path: str) -> Image:
return Image(path)


def audio_tool_fn(path: str) -> Audio:
return Audio(path)


def mixed_content_tool_fn() -> list[ContentBlock]:
return [
TextContent(type="text", text="Hello"),
Expand Down Expand Up @@ -300,6 +304,60 @@ async def test_tool_image_helper(self, tmp_path: Path):
# Check structured content - Image return type should NOT have structured output
assert result.structuredContent is None

@pytest.mark.anyio
async def test_tool_audio_helper(self, tmp_path: Path):
# Create a test audio
audio_path = tmp_path / "test.wav"
audio_path.write_bytes(b"fake wav data")

mcp = FastMCP()
mcp.add_tool(audio_tool_fn)
async with client_session(mcp._mcp_server) as client:
result = await client.call_tool("audio_tool_fn", {"path": str(audio_path)})
assert len(result.content) == 1
content = result.content[0]
assert isinstance(content, AudioContent)
assert content.type == "audio"
assert content.mimeType == "audio/wav"
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you also add something to exercice the suffix-based mime type detection?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just added a test to test the mimetypes, do let me know if this is what you are looking for!

# Verify base64 encoding
decoded = base64.b64decode(content.data)
assert decoded == b"fake wav data"
# Check structured content - Image return type should NOT have structured output
assert result.structuredContent is None

@pytest.mark.parametrize(
"filename,expected_mime_type",
[
("test.wav", "audio/wav"),
("test.mp3", "audio/mpeg"),
("test.ogg", "audio/ogg"),
("test.flac", "audio/flac"),
("test.aac", "audio/aac"),
("test.m4a", "audio/mp4"),
("test.unknown", "application/octet-stream"), # Unknown extension fallback
],
)
@pytest.mark.anyio
async def test_tool_audio_suffix_detection(self, tmp_path: Path, filename: str, expected_mime_type: str):
"""Test that Audio helper correctly detects MIME types from file suffixes"""
mcp = FastMCP()
mcp.add_tool(audio_tool_fn)

# Create a test audio file with the specific extension
audio_path = tmp_path / filename
audio_path.write_bytes(b"fake audio data")

async with client_session(mcp._mcp_server) as client:
result = await client.call_tool("audio_tool_fn", {"path": str(audio_path)})
assert len(result.content) == 1
content = result.content[0]
assert isinstance(content, AudioContent)
assert content.type == "audio"
assert content.mimeType == expected_mime_type
# Verify base64 encoding
decoded = base64.b64decode(content.data)
assert decoded == b"fake audio data"

@pytest.mark.anyio
async def test_tool_mixed_content(self):
mcp = FastMCP()
Expand Down Expand Up @@ -332,19 +390,24 @@ async def test_tool_mixed_content(self):
assert structured_result[i][key] == value

@pytest.mark.anyio
async def test_tool_mixed_list_with_image(self, tmp_path: Path):
async def test_tool_mixed_list_with_audio_and_image(self, tmp_path: Path):
"""Test that lists containing Image objects and other types are handled
correctly"""
# Create a test image
image_path = tmp_path / "test.png"
image_path.write_bytes(b"test image data")

# Create a test audio
audio_path = tmp_path / "test.wav"
audio_path.write_bytes(b"test audio data")

# TODO(Marcelo): It seems if we add the proper type hint, it generates an invalid JSON schema.
# We need to fix this.
def mixed_list_fn() -> list: # type: ignore
return [ # type: ignore
"text message",
Image(image_path),
Audio(audio_path),
{"key": "value"},
TextContent(type="text", text="direct content"),
]
Expand All @@ -353,7 +416,7 @@ def mixed_list_fn() -> list: # type: ignore
mcp.add_tool(mixed_list_fn) # type: ignore
async with client_session(mcp._mcp_server) as client:
result = await client.call_tool("mixed_list_fn", {})
assert len(result.content) == 4
assert len(result.content) == 5
# Check text conversion
content1 = result.content[0]
assert isinstance(content1, TextContent)
Expand All @@ -363,14 +426,19 @@ def mixed_list_fn() -> list: # type: ignore
assert isinstance(content2, ImageContent)
assert content2.mimeType == "image/png"
assert base64.b64decode(content2.data) == b"test image data"
# Check dict conversion
# Check audio conversion
content3 = result.content[2]
assert isinstance(content3, TextContent)
assert '"key": "value"' in content3.text
# Check direct TextContent
assert isinstance(content3, AudioContent)
assert content3.mimeType == "audio/wav"
assert base64.b64decode(content3.data) == b"test audio data"
# Check dict conversion
content4 = result.content[3]
assert isinstance(content4, TextContent)
assert content4.text == "direct content"
assert '"key": "value"' in content4.text
# Check direct TextContent
content5 = result.content[4]
assert isinstance(content5, TextContent)
assert content5.text == "direct content"
# Check structured content - untyped list with Image objects should NOT have structured output
assert result.structuredContent is None

Expand Down
Loading