Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 67 additions & 4 deletions src/agents/mcp/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,17 @@
import abc
import asyncio
import inspect
import sys
from collections.abc import Awaitable
from contextlib import AbstractAsyncContextManager, AsyncExitStack
from datetime import timedelta
from pathlib import Path
from typing import TYPE_CHECKING, Any, Callable, Literal, TypeVar

if sys.version_info < (3, 11):
from exceptiongroup import BaseExceptionGroup # pyright: ignore[reportMissingImports]


from anyio.streams.memory import MemoryObjectReceiveStream, MemoryObjectSendStream
from mcp import ClientSession, StdioServerParameters, Tool as MCPTool, stdio_client
from mcp.client.session import MessageHandlerFnT
Expand All @@ -29,6 +34,33 @@
from ..agent import AgentBase


def _unwrap_exception_group(exc: BaseException) -> BaseException:
"""Extract the most meaningful exception from a BaseExceptionGroup.

When using anyio task groups (as the MCP client library does), HTTP errors
and other exceptions get wrapped in BaseExceptionGroup. This function
extracts the most meaningful exception for clearer error reporting.

Args:
exc: The exception to unwrap, which may be a BaseExceptionGroup.

Returns:
The unwrapped exception, or the original exception if not a group.
"""
if isinstance(exc, BaseExceptionGroup):
# Filter out GeneratorExit and CancelledError as these are cleanup side effects.
meaningful = [
e for e in exc.exceptions if not isinstance(e, (GeneratorExit, asyncio.CancelledError))
]
if meaningful:
# Recursively unwrap in case of nested groups.
return _unwrap_exception_group(meaningful[0])
# If only cleanup exceptions remain, return the first one.
if exc.exceptions:
return _unwrap_exception_group(exc.exceptions[0])
return exc


class MCPServer(abc.ABC):
"""Base class for Model Context Protocol servers."""

Expand Down Expand Up @@ -147,6 +179,8 @@ def __init__(

self.tool_filter = tool_filter

self._cleanup_error: BaseException | None = None

async def _apply_tool_filter(
self,
tools: list[MCPTool],
Expand Down Expand Up @@ -285,9 +319,25 @@ async def connect(self):
server_result = await session.initialize()
self.server_initialize_result = server_result
self.session = session
except Exception as e:
logger.error(f"Error initializing MCP server: {e}")
except BaseException as e:
# Unwrap exception groups from anyio task groups to get the meaningful error.
# The MCP client library uses anyio task groups which wrap exceptions in
# BaseExceptionGroup when errors occur in background tasks (e.g., HTTP errors).
unwrapped = _unwrap_exception_group(e)

# Check if cleanup found a more meaningful error than what we caught.
# This happens when the real error (e.g., HTTPStatusError) is in a background
# task and we only caught CancelledError from the main task being cancelled.
await self.cleanup()
if self._cleanup_error is not None:
error_to_raise = self._cleanup_error
self._cleanup_error = None
logger.error(f"Error initializing MCP server {self.name}: {error_to_raise}")
raise error_to_raise from e

logger.error(f"Error initializing MCP server {self.name}: {unwrapped}")
if unwrapped is not e:
raise unwrapped from e
raise

async def list_tools(
Expand Down Expand Up @@ -349,8 +399,21 @@ async def cleanup(self):
async with self._cleanup_lock:
try:
await self.exit_stack.aclose()
except Exception as e:
logger.error(f"Error cleaning up server: {e}")
self._cleanup_error = None
except BaseException as e:
# During cleanup, exception groups can occur from anyio task group cancellation.
# Unwrap to get the meaningful error for logging and potential re-raising.
unwrapped = _unwrap_exception_group(e)
Comment on lines +403 to +406

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Re-raise interrupts during cleanup

The new except BaseException in cleanup() suppresses KeyboardInterrupt/SystemExit (and any other BaseException) instead of letting them abort the program. If a user hits Ctrl+C while cleanup is running, the interrupt will be logged and swallowed, so shutdown can hang or continue unexpectedly. Before this change these exceptions would propagate. Consider catching BaseExceptionGroup separately and re-raising KeyboardInterrupt/SystemExit (or re-raising any non-group BaseException) to preserve expected termination behavior.

Useful? React with 👍 / 👎.

# CancelledError during cleanup is expected when we're cancelling tasks,
# so log at debug level. Other errors are logged at error level and saved
# so connect() can re-raise them instead of a generic CancelledError.
if isinstance(unwrapped, asyncio.CancelledError):
logger.debug(f"MCP server {self.name} cleanup cancelled (expected)")
self._cleanup_error = None
else:
logger.error(f"Error cleaning up MCP server {self.name}: {unwrapped}")
# Save the meaningful error so it can be re-raised by connect().
self._cleanup_error = unwrapped
finally:
self.session = None

Expand Down