Skip to content

Commit ea42a21

Browse files
committed
add doc strings for readabilty and constrain types
1 parent c820086 commit ea42a21

File tree

1 file changed

+38
-10
lines changed

1 file changed

+38
-10
lines changed

src/fetch/src/mcp_server_fetch/server.py

Lines changed: 38 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import Optional
1+
from typing import Optional, Tuple
22
from urllib.parse import urlparse, urlunparse
33

44
import markdownify
@@ -17,13 +17,21 @@
1717
INTERNAL_ERROR,
1818
)
1919
from protego import Protego
20-
from pydantic import BaseModel, Field, ValidationError
20+
from pydantic import BaseModel, Field, AnyUrl, conint
2121

2222
DEFAULT_USER_AGENT_AUTONOMOUS = "ModelContextProtocol/1.0 (Autonomous; +https://github.com/modelcontextprotocol/servers)"
2323
DEFAULT_USER_AGENT_MANUAL = "ModelContextProtocol/1.0 (User-Specified; +https://github.com/modelcontextprotocol/servers)"
2424

2525

2626
def extract_content_from_html(html: str) -> str:
27+
"""Extract and convert HTML content to Markdown format.
28+
29+
Args:
30+
html: Raw HTML content to process
31+
32+
Returns:
33+
Simplified markdown version of the content
34+
"""
2735
ret = readabilipy.simple_json.simple_json_from_html_string(
2836
html, use_readability=True
2937
)
@@ -36,17 +44,25 @@ def extract_content_from_html(html: str) -> str:
3644
return content
3745

3846

39-
def get_robots_txt_url(url: str) -> str:
47+
def get_robots_txt_url(url: AnyUrl | str) -> str:
48+
"""Get the robots.txt URL for a given website URL.
49+
50+
Args:
51+
url: Website URL to get robots.txt for
52+
53+
Returns:
54+
URL of the robots.txt file
55+
"""
4056
# Parse the URL into components
41-
parsed = urlparse(url)
57+
parsed = urlparse(str(url))
4258

4359
# Reconstruct the base URL with just scheme, netloc, and /robots.txt path
4460
robots_url = urlunparse((parsed.scheme, parsed.netloc, "/robots.txt", "", "", ""))
4561

4662
return robots_url
4763

4864

49-
async def check_may_autonomously_fetch_url(url: str, user_agent: str):
65+
async def check_may_autonomously_fetch_url(url: AnyUrl | str, user_agent: str) -> None:
5066
"""
5167
Check if the URL can be fetched by the user agent according to the robots.txt file.
5268
Raises a McpError if not.
@@ -89,7 +105,9 @@ async def check_may_autonomously_fetch_url(url: str, user_agent: str):
89105
)
90106

91107

92-
async def fetch_url(url: str, user_agent: str, force_raw: bool = False) -> (str, str):
108+
async def fetch_url(
109+
url: AnyUrl | str, user_agent: str, force_raw: bool = False
110+
) -> Tuple[str, str]:
93111
"""
94112
Fetch the URL and return the content in a form ready for the LLM, as well as a prefix string with status information.
95113
"""
@@ -98,7 +116,7 @@ async def fetch_url(url: str, user_agent: str, force_raw: bool = False) -> (str,
98116
async with AsyncClient() as client:
99117
try:
100118
response = await client.get(
101-
url,
119+
str(url),
102120
follow_redirects=True,
103121
headers={"User-Agent": user_agent},
104122
timeout=30,
@@ -128,9 +146,13 @@ async def fetch_url(url: str, user_agent: str, force_raw: bool = False) -> (str,
128146

129147

130148
class Fetch(BaseModel):
131-
url: str = Field(..., description="URL to fetch")
132-
max_length: int = Field(5000, description="Maximum number of characters to return.")
133-
start_index: int = Field(
149+
"""Parameters for fetching a URL."""
150+
151+
url: AnyUrl = Field(..., description="URL to fetch")
152+
max_length: conint(gt=0, lt=1000000) = Field(
153+
5000, description="Maximum number of characters to return."
154+
)
155+
start_index: conint(ge=0) = Field(
134156
0,
135157
description="On return output starting at this character index, useful if a previous fetch was truncated and more context is required.",
136158
)
@@ -143,6 +165,12 @@ class Fetch(BaseModel):
143165
async def serve(
144166
custom_user_agent: Optional[str] = None, ignore_robots_txt: bool = False
145167
) -> None:
168+
"""Run the fetch MCP server.
169+
170+
Args:
171+
custom_user_agent: Optional custom User-Agent string to use for requests
172+
ignore_robots_txt: Whether to ignore robots.txt restrictions
173+
"""
146174
server = Server("mcp-fetch")
147175
user_agent_autonomous = custom_user_agent or DEFAULT_USER_AGENT_AUTONOMOUS
148176
user_agent_manual = custom_user_agent or DEFAULT_USER_AGENT_MANUAL

0 commit comments

Comments
 (0)