Skip to content

Commit ac3c61d

Browse files
Merge pull request modelcontextprotocol#144 from SkywaveTM/main
fix(fetch): properly handles robots.txt
2 parents a807125 + 1778cc7 commit ac3c61d

File tree

1 file changed

+4
-2
lines changed

1 file changed

+4
-2
lines changed

src/fetch/src/mcp_server_fetch/server.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,9 @@ async def check_may_autonomously_fetch_url(url: str, user_agent: str) -> None:
7474
async with AsyncClient() as client:
7575
try:
7676
response = await client.get(
77-
robot_txt_url, headers={"User-Agent": user_agent}
77+
robot_txt_url,
78+
follow_redirects=True,
79+
headers={"User-Agent": user_agent},
7880
)
7981
except HTTPError:
8082
raise McpError(
@@ -93,7 +95,7 @@ async def check_may_autonomously_fetch_url(url: str, user_agent: str) -> None:
9395
line for line in robot_txt.splitlines() if not line.strip().startswith("#")
9496
)
9597
robot_parser = Protego.parse(processed_robot_txt)
96-
if not robot_parser.can_fetch(url, user_agent):
98+
if not robot_parser.can_fetch(str(url), user_agent):
9799
raise McpError(
98100
INTERNAL_ERROR,
99101
f"The sites robots.txt ({robot_txt_url}), specifies that autonomous fetching of this page is not allowed, "

0 commit comments

Comments
 (0)