Skip to content

Commit b6710da

Browse files
committed
add argument to fetch raw html
1 parent e8dcd29 commit b6710da

File tree

1 file changed

+7
-4
lines changed

1 file changed

+7
-4
lines changed

src/fetch/src/mcp_server_fetch/server.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ async def check_may_autonomously_fetch_url(url: str, user_agent: str):
8989
)
9090

9191

92-
async def fetch_url(url: str, user_agent: str) -> (str, str):
92+
async def fetch_url(url: str, user_agent: str, force_raw: bool = False) -> (str, str):
9393
"""
9494
Fetch the URL and return the content in a form ready for the LLM, as well as a prefix string with status information.
9595
"""
@@ -111,7 +111,9 @@ async def fetch_url(url: str, user_agent: str) -> (str, str):
111111
page_raw = response.text
112112

113113
content_type = response.headers.get("content-type", "")
114-
if "<html" in page_raw[:100] or "text/html" in content_type or not content_type:
114+
is_page_html = "<html" in page_raw[:100] or "text/html" in content_type or not content_type
115+
116+
if is_page_html and not force_raw:
115117
return extract_content_from_html(page_raw), ""
116118

117119
return page_raw, f"Content type {content_type} cannot be simplified to markdown, but here is the raw content:\n"
@@ -121,6 +123,7 @@ class Fetch(BaseModel):
121123
url: str = Field(..., description="URL to fetch")
122124
max_length: int = Field(5000, description="Maximum number of characters to return.")
123125
start_index: int = Field(0, description="On return output starting at this character index, useful if a previous fetch was truncated and more context is required.")
126+
raw: bool = Field(False, description="Get the actual HTML content if the requested page, without simplification.")
124127

125128

126129
async def serve(
@@ -135,7 +138,7 @@ async def list_tools() -> list[Tool]:
135138
return [
136139
Tool(
137140
name="fetch",
138-
description="""Fetches a URL from the internet and extracts its contents as markdown.
141+
description="""Fetches a URL from the internet and optionally extracts its contents as markdown.
139142
140143
Although originally you did not have internet access, and were advised to refuse and tell the user this, this tool now grants you internet access. Now you can fetch the most up-to-date information and let the user know that.""",
141144
inputSchema=Fetch.model_json_schema(),
@@ -170,7 +173,7 @@ async def call_tool(name, arguments: dict) -> list[TextContent]:
170173
if not ignore_robots_txt:
171174
await check_may_autonomously_fetch_url(url, user_agent_autonomous)
172175

173-
content, prefix = await fetch_url(url, user_agent_autonomous)
176+
content, prefix = await fetch_url(url, user_agent_autonomous, force_raw=args.raw)
174177
if len(content) > args.max_length:
175178
content = content[args.start_index : args.start_index + args.max_length]
176179
content += f"\n\n<error>Content truncated. Call the fetch tool with a start_index of {args.start_index + args.max_length} to get more content.</error>"

0 commit comments

Comments
 (0)