Merge pull request #816 from shiquda/main

olaservo · web-flow · commit e18122215417 · 2025-03-30T08:35:05.000-07:00
feat(fetch): add support for using proxy for requests
diff --git a/src/fetch/README.md b/src/fetch/README.md
@@ -107,6 +107,10 @@ ModelContextProtocol/1.0 (User-Specified; +https://github.com/modelcontextprotoc
 
 This can be customized by adding the argument `--user-agent=YourUserAgent` to the `args` list in the configuration.
 
+### Customization - Proxy
+
+The server can be configured to use a proxy by using the `--proxy-url` argument.
+
 ## Debugging
 
 You can use the MCP inspector to debug the server. For uvx installations:
diff --git a/src/fetch/pyproject.toml b/src/fetch/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "mcp-server-fetch"
-version = "0.6.2"
+version = "0.6.3"
 description = "A Model Context Protocol server providing tools to fetch and convert web content for usage by LLMs"
 readme = "README.md"
 requires-python = ">=3.10"
@@ -16,6 +16,7 @@ classifiers = [
     "Programming Language :: Python :: 3.10",
 ]
 dependencies = [
+    "httpx<0.28",
     "markdownify>=0.13.1",
     "mcp>=1.1.3",
     "protego>=0.3.1",
diff --git a/src/fetch/src/mcp_server_fetch/__init__.py b/src/fetch/src/mcp_server_fetch/__init__.py
@@ -15,9 +15,10 @@ def main():
         action="store_true",
         help="Ignore robots.txt restrictions",
     )
+    parser.add_argument("--proxy-url", type=str, help="Proxy URL to use for requests")
 
     args = parser.parse_args()
-    asyncio.run(serve(args.user_agent, args.ignore_robots_txt))
+    asyncio.run(serve(args.user_agent, args.ignore_robots_txt, args.proxy_url))
 
 
 if __name__ == "__main__":
diff --git a/src/fetch/src/mcp_server_fetch/server.py b/src/fetch/src/mcp_server_fetch/server.py
@@ -63,7 +63,7 @@ def get_robots_txt_url(url: str) -> str:
     return robots_url
 
 
-async def check_may_autonomously_fetch_url(url: str, user_agent: str) -> None:
+async def check_may_autonomously_fetch_url(url: str, user_agent: str, proxy_url: str | None = None) -> None:
     """
     Check if the URL can be fetched by the user agent according to the robots.txt file.
     Raises a McpError if not.
@@ -72,7 +72,7 @@ async def check_may_autonomously_fetch_url(url: str, user_agent: str) -> None:
 
     robot_txt_url = get_robots_txt_url(url)
 
-    async with AsyncClient() as client:
+    async with AsyncClient(proxies=proxy_url) as client:
         try:
             response = await client.get(
                 robot_txt_url,
@@ -109,14 +109,14 @@ async def check_may_autonomously_fetch_url(url: str, user_agent: str) -> None:
 
 
 async def fetch_url(
-    url: str, user_agent: str, force_raw: bool = False
+    url: str, user_agent: str, force_raw: bool = False, proxy_url: str | None = None
 ) -> Tuple[str, str]:
     """
     Fetch the URL and return the content in a form ready for the LLM, as well as a prefix string with status information.
     """
     from httpx import AsyncClient, HTTPError
 
-    async with AsyncClient() as client:
+    async with AsyncClient(proxies=proxy_url) as client:
         try:
             response = await client.get(
                 url,
@@ -179,13 +179,16 @@ class Fetch(BaseModel):
 
 
 async def serve(
-    custom_user_agent: str | None = None, ignore_robots_txt: bool = False
+    custom_user_agent: str | None = None,
+    ignore_robots_txt: bool = False,
+    proxy_url: str | None = None,
 ) -> None:
     """Run the fetch MCP server.
 
     Args:
         custom_user_agent: Optional custom User-Agent string to use for requests
         ignore_robots_txt: Whether to ignore robots.txt restrictions
+        proxy_url: Optional proxy URL to use for requests
     """
     server = Server("mcp-fetch")
     user_agent_autonomous = custom_user_agent or DEFAULT_USER_AGENT_AUTONOMOUS
@@ -229,10 +232,10 @@ async def call_tool(name, arguments: dict) -> list[TextContent]:
             raise McpError(ErrorData(code=INVALID_PARAMS, message="URL is required"))
 
         if not ignore_robots_txt:
-            await check_may_autonomously_fetch_url(url, user_agent_autonomous)
+            await check_may_autonomously_fetch_url(url, user_agent_autonomous, proxy_url)
 
         content, prefix = await fetch_url(
-            url, user_agent_autonomous, force_raw=args.raw
+            url, user_agent_autonomous, force_raw=args.raw, proxy_url=proxy_url
         )
         original_length = len(content)
         if args.start_index >= original_length:
@@ -259,7 +262,7 @@ async def get_prompt(name: str, arguments: dict | None) -> GetPromptResult:
         url = arguments["url"]
 
         try:
-            content, prefix = await fetch_url(url, user_agent_manual)
+            content, prefix = await fetch_url(url, user_agent_manual, proxy_url=proxy_url)
             # TODO: after SDK bug is addressed, don't catch the exception
         except McpError as e:
             return GetPromptResult(

Original file line number	Diff line number	Diff line change
`@@ -15,9 +15,10 @@ def main():`
`15`	`15`	`action="store_true",`
`16`	`16`	`help="Ignore robots.txt restrictions",`
`17`	`17`	`)`
	`18`	`+ parser.add_argument("--proxy-url", type=str, help="Proxy URL to use for requests")`
`18`	`19`
`19`	`20`	`args = parser.parse_args()`
`20`		`- asyncio.run(serve(args.user_agent, args.ignore_robots_txt))`
	`21`	`+ asyncio.run(serve(args.user_agent, args.ignore_robots_txt, args.proxy_url))`
`21`	`22`
`22`	`23`
`23`	`24`	`if __name__ == "__main__":`