Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion src/fetch/src/mcp_server_fetch/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,15 @@ def main():
help="Ignore robots.txt restrictions",
)
parser.add_argument("--proxy-url", type=str, help="Proxy URL to use for requests")
parser.add_argument(
"--max-retries",
type=int,
default=3,
help="Maximum number of retries for transient errors (429/5xx/network). Default: 3",
)

args = parser.parse_args()
asyncio.run(serve(args.user_agent, args.ignore_robots_txt, args.proxy_url))
asyncio.run(serve(args.user_agent, args.ignore_robots_txt, args.proxy_url, args.max_retries))


if __name__ == "__main__":
Expand Down
66 changes: 53 additions & 13 deletions src/fetch/src/mcp_server_fetch/server.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import asyncio
import random
from typing import Annotated, Tuple
from urllib.parse import urlparse, urlunparse

Expand All @@ -23,6 +25,23 @@
DEFAULT_USER_AGENT_AUTONOMOUS = "ModelContextProtocol/1.0 (Autonomous; +https://github.com/modelcontextprotocol/servers)"
DEFAULT_USER_AGENT_MANUAL = "ModelContextProtocol/1.0 (User-Specified; +https://github.com/modelcontextprotocol/servers)"

_RETRYABLE_STATUS_CODES: frozenset[int] = frozenset({429, 500, 502, 503, 504})
_DEFAULT_MAX_RETRIES = 3


def _retry_wait(retry_after_header: str | None, attempt: int) -> float:
"""Return seconds to wait before the next retry.

Respects Retry-After header when present; otherwise uses full-jitter
exponential backoff capped at 30 seconds.
"""
if retry_after_header is not None:
try:
return max(0.0, float(retry_after_header))
except ValueError:
pass
return random.uniform(0.0, min(30.0, 2.0 ** attempt))


def extract_content_from_html(html: str) -> str:
"""Extract and convert HTML content to Markdown format.
Expand Down Expand Up @@ -109,23 +128,41 @@ async def check_may_autonomously_fetch_url(url: str, user_agent: str, proxy_url:


async def fetch_url(
url: str, user_agent: str, force_raw: bool = False, proxy_url: str | None = None
url: str,
user_agent: str,
force_raw: bool = False,
proxy_url: str | None = None,
max_retries: int = _DEFAULT_MAX_RETRIES,
) -> Tuple[str, str]:
"""
Fetch the URL and return the content in a form ready for the LLM, as well as a prefix string with status information.
"""
from httpx import AsyncClient, HTTPError
from httpx import AsyncClient, HTTPError, TransportError

async with AsyncClient(proxy=proxy_url) as client:
try:
response = await client.get(
url,
follow_redirects=True,
headers={"User-Agent": user_agent},
timeout=30,
)
except HTTPError as e:
raise McpError(ErrorData(code=INTERNAL_ERROR, message=f"Failed to fetch {url}: {e!r}"))
response = None
for attempt in range(max_retries + 1):
try:
response = await client.get(
url,
follow_redirects=True,
headers={"User-Agent": user_agent},
timeout=30,
)
except TransportError as e:
if attempt < max_retries:
await asyncio.sleep(random.uniform(0.0, min(30.0, 2.0 ** attempt)))
continue
raise McpError(ErrorData(code=INTERNAL_ERROR, message=f"Failed to fetch {url}: {e!r}"))
except HTTPError as e:
raise McpError(ErrorData(code=INTERNAL_ERROR, message=f"Failed to fetch {url}: {e!r}"))

if response.status_code in _RETRYABLE_STATUS_CODES and attempt < max_retries:
await asyncio.sleep(_retry_wait(response.headers.get("Retry-After"), attempt))
continue
break

assert response is not None
if response.status_code >= 400:
raise McpError(ErrorData(
code=INTERNAL_ERROR,
Expand Down Expand Up @@ -182,13 +219,15 @@ async def serve(
custom_user_agent: str | None = None,
ignore_robots_txt: bool = False,
proxy_url: str | None = None,
max_retries: int = _DEFAULT_MAX_RETRIES,
) -> None:
"""Run the fetch MCP server.

Args:
custom_user_agent: Optional custom User-Agent string to use for requests
ignore_robots_txt: Whether to ignore robots.txt restrictions
proxy_url: Optional proxy URL to use for requests
max_retries: Number of retries for transient errors (429/5xx/network)
"""
server = Server("mcp-fetch")
user_agent_autonomous = custom_user_agent or DEFAULT_USER_AGENT_AUTONOMOUS
Expand Down Expand Up @@ -235,7 +274,8 @@ async def call_tool(name, arguments: dict) -> list[TextContent]:
await check_may_autonomously_fetch_url(url, user_agent_autonomous, proxy_url)

content, prefix = await fetch_url(
url, user_agent_autonomous, force_raw=args.raw, proxy_url=proxy_url
url, user_agent_autonomous, force_raw=args.raw, proxy_url=proxy_url,
max_retries=max_retries,
)
original_length = len(content)
if args.start_index >= original_length:
Expand All @@ -262,7 +302,7 @@ async def get_prompt(name: str, arguments: dict | None) -> GetPromptResult:
url = arguments["url"]

try:
content, prefix = await fetch_url(url, user_agent_manual, proxy_url=proxy_url)
content, prefix = await fetch_url(url, user_agent_manual, proxy_url=proxy_url, max_retries=max_retries)
# TODO: after SDK bug is addressed, don't catch the exception
except McpError as e:
return GetPromptResult(
Expand Down
190 changes: 186 additions & 4 deletions src/fetch/tests/test_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
get_robots_txt_url,
check_may_autonomously_fetch_url,
fetch_url,
_retry_wait,
DEFAULT_USER_AGENT_AUTONOMOUS,
)

Expand Down Expand Up @@ -268,9 +269,10 @@ async def test_fetch_json_returns_raw(self):

@pytest.mark.asyncio
async def test_fetch_404_raises_error(self):
"""Test that 404 response raises McpError."""
"""Test that 404 response raises McpError immediately without retry."""
mock_response = MagicMock()
mock_response.status_code = 404
mock_response.headers = {}

with patch("httpx.AsyncClient") as mock_client_class:
mock_client = AsyncMock()
Expand All @@ -281,14 +283,18 @@ async def test_fetch_404_raises_error(self):
with pytest.raises(McpError):
await fetch_url(
"https://example.com/notfound",
DEFAULT_USER_AGENT_AUTONOMOUS
DEFAULT_USER_AGENT_AUTONOMOUS,
)

# 404 is not retryable — should only be called once
assert mock_client.get.call_count == 1

@pytest.mark.asyncio
async def test_fetch_500_raises_error(self):
"""Test that 500 response raises McpError."""
"""Test that 500 response raises McpError (no retries)."""
mock_response = MagicMock()
mock_response.status_code = 500
mock_response.headers = {}

with patch("httpx.AsyncClient") as mock_client_class:
mock_client = AsyncMock()
Expand All @@ -299,7 +305,8 @@ async def test_fetch_500_raises_error(self):
with pytest.raises(McpError):
await fetch_url(
"https://example.com/error",
DEFAULT_USER_AGENT_AUTONOMOUS
DEFAULT_USER_AGENT_AUTONOMOUS,
max_retries=0,
)

@pytest.mark.asyncio
Expand All @@ -324,3 +331,178 @@ async def test_fetch_with_proxy(self):

# Verify AsyncClient was called with proxy
mock_client_class.assert_called_once_with(proxy="http://proxy.example.com:8080")


class TestRetryWait:
"""Tests for _retry_wait helper."""

def test_respects_retry_after_header(self):
assert _retry_wait("5", 0) == 5.0

def test_retry_after_zero(self):
assert _retry_wait("0", 0) == 0.0

def test_retry_after_negative_clamped_to_zero(self):
assert _retry_wait("-1", 0) == 0.0

def test_invalid_retry_after_falls_back_to_backoff(self):
# Non-numeric Retry-After falls back to jitter backoff
result = _retry_wait("Wed, 21 Oct 2015 07:28:00 GMT", 0)
assert 0.0 <= result <= 1.0

def test_no_header_uses_exponential_backoff(self):
result = _retry_wait(None, 2)
assert 0.0 <= result <= 4.0

def test_backoff_capped_at_30(self):
result = _retry_wait(None, 100)
assert result <= 30.0


class TestRetryBehavior:
"""Tests for retry logic in fetch_url."""

@pytest.mark.asyncio
async def test_retries_on_503_then_succeeds(self):
"""503 response is retried; succeeds on the second attempt."""
error_response = MagicMock()
error_response.status_code = 503
error_response.headers = {}

ok_response = MagicMock()
ok_response.status_code = 200
ok_response.text = '{"ok": true}'
ok_response.headers = {"content-type": "application/json"}

with patch("httpx.AsyncClient") as mock_client_class, \
patch("asyncio.sleep", new_callable=AsyncMock) as mock_sleep:
mock_client = AsyncMock()
mock_client.get = AsyncMock(side_effect=[error_response, ok_response])
mock_client_class.return_value.__aenter__ = AsyncMock(return_value=mock_client)
mock_client_class.return_value.__aexit__ = AsyncMock(return_value=None)

content, _ = await fetch_url(
"https://example.com/api",
DEFAULT_USER_AGENT_AUTONOMOUS,
)

assert mock_client.get.call_count == 2
mock_sleep.assert_called_once()
assert content == '{"ok": true}'

@pytest.mark.asyncio
async def test_retries_on_429_respects_retry_after(self):
"""429 response is retried with the Retry-After delay."""
rate_limit_response = MagicMock()
rate_limit_response.status_code = 429
rate_limit_response.headers = {"Retry-After": "2"}

ok_response = MagicMock()
ok_response.status_code = 200
ok_response.text = "done"
ok_response.headers = {"content-type": "text/plain"}

with patch("httpx.AsyncClient") as mock_client_class, \
patch("asyncio.sleep", new_callable=AsyncMock) as mock_sleep:
mock_client = AsyncMock()
mock_client.get = AsyncMock(side_effect=[rate_limit_response, ok_response])
mock_client_class.return_value.__aenter__ = AsyncMock(return_value=mock_client)
mock_client_class.return_value.__aexit__ = AsyncMock(return_value=None)

await fetch_url("https://example.com/api", DEFAULT_USER_AGENT_AUTONOMOUS)

mock_sleep.assert_called_once_with(2.0)

@pytest.mark.asyncio
async def test_raises_after_exhausting_retries(self):
"""McpError is raised once all retries are exhausted."""
error_response = MagicMock()
error_response.status_code = 503
error_response.headers = {}

with patch("httpx.AsyncClient") as mock_client_class, \
patch("asyncio.sleep", new_callable=AsyncMock):
mock_client = AsyncMock()
mock_client.get = AsyncMock(return_value=error_response)
mock_client_class.return_value.__aenter__ = AsyncMock(return_value=mock_client)
mock_client_class.return_value.__aexit__ = AsyncMock(return_value=None)

with pytest.raises(McpError):
await fetch_url(
"https://example.com/api",
DEFAULT_USER_AGENT_AUTONOMOUS,
max_retries=2,
)

# initial attempt + 2 retries
assert mock_client.get.call_count == 3

@pytest.mark.asyncio
async def test_no_retry_on_404(self):
"""404 is not in the retryable set — should not retry."""
error_response = MagicMock()
error_response.status_code = 404
error_response.headers = {}

with patch("httpx.AsyncClient") as mock_client_class, \
patch("asyncio.sleep", new_callable=AsyncMock) as mock_sleep:
mock_client = AsyncMock()
mock_client.get = AsyncMock(return_value=error_response)
mock_client_class.return_value.__aenter__ = AsyncMock(return_value=mock_client)
mock_client_class.return_value.__aexit__ = AsyncMock(return_value=None)

with pytest.raises(McpError):
await fetch_url("https://example.com/missing", DEFAULT_USER_AGENT_AUTONOMOUS)

assert mock_client.get.call_count == 1
mock_sleep.assert_not_called()

@pytest.mark.asyncio
async def test_retries_on_transient_network_error(self):
"""TimeoutException is retried; succeeds on the second attempt."""
import httpx

ok_response = MagicMock()
ok_response.status_code = 200
ok_response.text = "hello"
ok_response.headers = {"content-type": "text/plain"}

with patch("httpx.AsyncClient") as mock_client_class, \
patch("asyncio.sleep", new_callable=AsyncMock):
mock_client = AsyncMock()
mock_client.get = AsyncMock(
side_effect=[httpx.TimeoutException("timed out"), ok_response]
)
mock_client_class.return_value.__aenter__ = AsyncMock(return_value=mock_client)
mock_client_class.return_value.__aexit__ = AsyncMock(return_value=None)

content, _ = await fetch_url(
"https://example.com/slow",
DEFAULT_USER_AGENT_AUTONOMOUS,
)

assert mock_client.get.call_count == 2
assert content == "hello"

@pytest.mark.asyncio
async def test_network_error_raises_after_exhausting_retries(self):
"""McpError is raised when network errors exhaust all retries."""
import httpx

with patch("httpx.AsyncClient") as mock_client_class, \
patch("asyncio.sleep", new_callable=AsyncMock):
mock_client = AsyncMock()
mock_client.get = AsyncMock(
side_effect=httpx.ConnectError("connection refused")
)
mock_client_class.return_value.__aenter__ = AsyncMock(return_value=mock_client)
mock_client_class.return_value.__aexit__ = AsyncMock(return_value=None)

with pytest.raises(McpError):
await fetch_url(
"https://example.com/down",
DEFAULT_USER_AGENT_AUTONOMOUS,
max_retries=1,
)

assert mock_client.get.call_count == 2
Loading