Skip to content

Commit 37622d3

Browse files
committed
add handling of non-html pages
1 parent 467330d commit 37622d3

File tree

1 file changed

+9
-4
lines changed

1 file changed

+9
-4
lines changed

src/fetch/src/mcp_server_fetch/server.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,11 @@
2323
DEFAULT_USER_AGENT_MANUAL = "ModelContextProtocol/1.0 (User-Specified; +https://github.com/modelcontextprotocol/servers)"
2424

2525

26-
def extract_content(html: str) -> str:
26+
def extract_content_from_html(html: str) -> str:
2727
ret = readabilipy.simple_json.simple_json_from_html_string(
2828
html, use_readability=True
2929
)
30-
if not ret["plain_content"]:
30+
if not ret["content"]:
3131
return "<error>Page failed to be simplified from HTML</error>"
3232
content = markdownify.markdownify(
3333
ret["content"],
@@ -105,13 +105,18 @@ async def fetch_url(url: str, user_agent: str) -> str:
105105
f"Failed to fetch {url} - status code {response.status_code}",
106106
)
107107

108-
page_html = response.text
108+
page_raw = response.text
109109

110-
return extract_content(page_html)
110+
content_type = response.headers.get("content-type", "")
111+
if "<html" in page_raw[:100] or "text/html" in content_type or not content_type:
112+
return extract_content_from_html(page_raw)
113+
114+
return f"Content type {content_type} cannot be simplified to markdown, but here is the raw content:\n{page_raw}"
111115

112116

113117
class Fetch(BaseModel):
114118
url: str = Field(..., description="URL to fetch")
119+
start_index: int = Field(0, description="On return output starting at this character index, useful if a previous fetch was truncated and more context is required.")
115120

116121

117122
async def serve(

0 commit comments

Comments
 (0)