17
17
INTERNAL_ERROR ,
18
18
)
19
19
from protego import Protego
20
- from pydantic import BaseModel , Field
20
+ from pydantic import BaseModel , Field , ValidationError
21
21
22
22
DEFAULT_USER_AGENT_AUTONOMOUS = "ModelContextProtocol/1.0 (Autonomous; +https://github.com/modelcontextprotocol/servers)"
23
23
DEFAULT_USER_AGENT_MANUAL = "ModelContextProtocol/1.0 (User-Specified; +https://github.com/modelcontextprotocol/servers)"
@@ -89,7 +89,10 @@ async def check_may_autonomously_fetch_url(url: str, user_agent: str):
89
89
)
90
90
91
91
92
- async def fetch_url (url : str , user_agent : str ) -> str :
92
+ async def fetch_url (url : str , user_agent : str ) -> (str , str ):
93
+ """
94
+ Fetch the URL and return the content in a form ready for the LLM, as well as a prefix string with status information.
95
+ """
93
96
from httpx import AsyncClient , HTTPError
94
97
95
98
async with AsyncClient () as client :
@@ -109,13 +112,14 @@ async def fetch_url(url: str, user_agent: str) -> str:
109
112
110
113
content_type = response .headers .get ("content-type" , "" )
111
114
if "<html" in page_raw [:100 ] or "text/html" in content_type or not content_type :
112
- return extract_content_from_html (page_raw )
115
+ return extract_content_from_html (page_raw ), ""
113
116
114
- return f"Content type { content_type } cannot be simplified to markdown, but here is the raw content:\n { page_raw } "
117
+ return page_raw , f"Content type { content_type } cannot be simplified to markdown, but here is the raw content:\n "
115
118
116
119
117
120
class Fetch (BaseModel ):
118
121
url : str = Field (..., description = "URL to fetch" )
122
+ max_length : int = Field (5000 , description = "Maximum number of characters to return." )
119
123
start_index : int = Field (0 , description = "On return output starting at this character index, useful if a previous fetch was truncated and more context is required." )
120
124
121
125
@@ -154,15 +158,23 @@ async def list_prompts() -> list[Prompt]:
154
158
155
159
@server .call_tool ()
156
160
async def call_tool (name , arguments : dict ) -> list [TextContent ]:
157
- url = arguments .get ("url" )
161
+ try :
162
+ args = Fetch (** arguments )
163
+ except ValueError as e :
164
+ raise McpError (INVALID_PARAMS , str (e ))
165
+
166
+ url = args .url
158
167
if not url :
159
168
raise McpError (INVALID_PARAMS , "URL is required" )
160
169
161
170
if not ignore_robots_txt :
162
171
await check_may_autonomously_fetch_url (url , user_agent_autonomous )
163
172
164
- content = await fetch_url (url , user_agent_autonomous )
165
- return [TextContent (type = "text" , text = f"Contents of { url } :\n { content } " )]
173
+ content , prefix = await fetch_url (url , user_agent_autonomous )
174
+ if len (content ) > args .max_length :
175
+ content = content [args .start_index : args .start_index + args .max_length ]
176
+ content += f"\n \n <error>Content truncated. Call the fetch tool with a start_index of { args .start_index + args .max_length } to get more content.</error>"
177
+ return [TextContent (type = "text" , text = f"{ prefix } Contents of { url } :\n { content } " )]
166
178
167
179
@server .get_prompt ()
168
180
async def get_prompt (name : str , arguments : dict | None ) -> GetPromptResult :
@@ -172,7 +184,7 @@ async def get_prompt(name: str, arguments: dict | None) -> GetPromptResult:
172
184
url = arguments ["url" ]
173
185
174
186
try :
175
- content = await fetch_url (url , user_agent_manual )
187
+ content , prefix = await fetch_url (url , user_agent_manual )
176
188
# TODO: after SDK bug is addressed, don't catch the exception
177
189
except McpError as e :
178
190
return GetPromptResult (
@@ -188,7 +200,7 @@ async def get_prompt(name: str, arguments: dict | None) -> GetPromptResult:
188
200
description = f"Contents of { url } " ,
189
201
messages = [
190
202
PromptMessage (
191
- role = "user" , content = TextContent (type = "text" , text = content )
203
+ role = "user" , content = TextContent (type = "text" , text = prefix + content )
192
204
)
193
205
],
194
206
)
0 commit comments