1717 INTERNAL_ERROR ,
1818)
1919from protego import Protego
20- from pydantic import BaseModel , Field
20+ from pydantic import BaseModel , Field , ValidationError
2121
2222DEFAULT_USER_AGENT_AUTONOMOUS = "ModelContextProtocol/1.0 (Autonomous; +https://github.com/modelcontextprotocol/servers)"
2323DEFAULT_USER_AGENT_MANUAL = "ModelContextProtocol/1.0 (User-Specified; +https://github.com/modelcontextprotocol/servers)"
@@ -89,7 +89,10 @@ async def check_may_autonomously_fetch_url(url: str, user_agent: str):
8989 )
9090
9191
92- async def fetch_url (url : str , user_agent : str ) -> str :
92+ async def fetch_url (url : str , user_agent : str ) -> (str , str ):
93+ """
94+ Fetch the URL and return the content in a form ready for the LLM, as well as a prefix string with status information.
95+ """
9396 from httpx import AsyncClient , HTTPError
9497
9598 async with AsyncClient () as client :
@@ -109,13 +112,14 @@ async def fetch_url(url: str, user_agent: str) -> str:
109112
110113 content_type = response .headers .get ("content-type" , "" )
111114 if "<html" in page_raw [:100 ] or "text/html" in content_type or not content_type :
112- return extract_content_from_html (page_raw )
115+ return extract_content_from_html (page_raw ), ""
113116
114- return f"Content type { content_type } cannot be simplified to markdown, but here is the raw content:\n { page_raw } "
117+ return page_raw , f"Content type { content_type } cannot be simplified to markdown, but here is the raw content:\n "
115118
116119
117120class Fetch (BaseModel ):
118121 url : str = Field (..., description = "URL to fetch" )
122+ max_length : int = Field (5000 , description = "Maximum number of characters to return." )
119123 start_index : int = Field (0 , description = "On return output starting at this character index, useful if a previous fetch was truncated and more context is required." )
120124
121125
@@ -154,15 +158,23 @@ async def list_prompts() -> list[Prompt]:
154158
155159 @server .call_tool ()
156160 async def call_tool (name , arguments : dict ) -> list [TextContent ]:
157- url = arguments .get ("url" )
161+ try :
162+ args = Fetch (** arguments )
163+ except ValueError as e :
164+ raise McpError (INVALID_PARAMS , str (e ))
165+
166+ url = args .url
158167 if not url :
159168 raise McpError (INVALID_PARAMS , "URL is required" )
160169
161170 if not ignore_robots_txt :
162171 await check_may_autonomously_fetch_url (url , user_agent_autonomous )
163172
164- content = await fetch_url (url , user_agent_autonomous )
165- return [TextContent (type = "text" , text = f"Contents of { url } :\n { content } " )]
173+ content , prefix = await fetch_url (url , user_agent_autonomous )
174+ if len (content ) > args .max_length :
175+ content = content [args .start_index : args .start_index + args .max_length ]
176+ content += f"\n \n <error>Content truncated. Call the fetch tool with a start_index of { args .start_index + args .max_length } to get more content.</error>"
177+ return [TextContent (type = "text" , text = f"{ prefix } Contents of { url } :\n { content } " )]
166178
167179 @server .get_prompt ()
168180 async def get_prompt (name : str , arguments : dict | None ) -> GetPromptResult :
@@ -172,7 +184,7 @@ async def get_prompt(name: str, arguments: dict | None) -> GetPromptResult:
172184 url = arguments ["url" ]
173185
174186 try :
175- content = await fetch_url (url , user_agent_manual )
187+ content , prefix = await fetch_url (url , user_agent_manual )
176188 # TODO: after SDK bug is addressed, don't catch the exception
177189 except McpError as e :
178190 return GetPromptResult (
@@ -188,7 +200,7 @@ async def get_prompt(name: str, arguments: dict | None) -> GetPromptResult:
188200 description = f"Contents of { url } " ,
189201 messages = [
190202 PromptMessage (
191- role = "user" , content = TextContent (type = "text" , text = content )
203+ role = "user" , content = TextContent (type = "text" , text = prefix + content )
192204 )
193205 ],
194206 )
0 commit comments