7
7
from mcp .server import Server
8
8
from mcp .server .stdio import stdio_server
9
9
from mcp .types import (
10
+ ErrorData ,
10
11
GetPromptResult ,
11
12
Prompt ,
12
13
PromptArgument ,
@@ -79,15 +80,15 @@ async def check_may_autonomously_fetch_url(url: str, user_agent: str) -> None:
79
80
headers = {"User-Agent" : user_agent },
80
81
)
81
82
except HTTPError :
82
- raise McpError (
83
- INTERNAL_ERROR ,
84
- f"Failed to fetch robots.txt { robot_txt_url } due to a connection issue" ,
85
- )
83
+ raise McpError (ErrorData (
84
+ code = INTERNAL_ERROR ,
85
+ message = f"Failed to fetch robots.txt { robot_txt_url } due to a connection issue" ,
86
+ ))
86
87
if response .status_code in (401 , 403 ):
87
- raise McpError (
88
- INTERNAL_ERROR ,
89
- f"When fetching robots.txt ({ robot_txt_url } ), received status { response .status_code } so assuming that autonomous fetching is not allowed, the user can try manually fetching by using the fetch prompt" ,
90
- )
88
+ raise McpError (ErrorData (
89
+ code = INTERNAL_ERROR ,
90
+ message = f"When fetching robots.txt ({ robot_txt_url } ), received status { response .status_code } so assuming that autonomous fetching is not allowed, the user can try manually fetching by using the fetch prompt" ,
91
+ ))
91
92
elif 400 <= response .status_code < 500 :
92
93
return
93
94
robot_txt = response .text
@@ -96,15 +97,15 @@ async def check_may_autonomously_fetch_url(url: str, user_agent: str) -> None:
96
97
)
97
98
robot_parser = Protego .parse (processed_robot_txt )
98
99
if not robot_parser .can_fetch (str (url ), user_agent ):
99
- raise McpError (
100
- INTERNAL_ERROR ,
101
- f"The sites robots.txt ({ robot_txt_url } ), specifies that autonomous fetching of this page is not allowed, "
100
+ raise McpError (ErrorData (
101
+ code = INTERNAL_ERROR ,
102
+ message = f"The sites robots.txt ({ robot_txt_url } ), specifies that autonomous fetching of this page is not allowed, "
102
103
f"<useragent>{ user_agent } </useragent>\n "
103
104
f"<url>{ url } </url>"
104
105
f"<robots>\n { robot_txt } \n </robots>\n "
105
106
f"The assistant must let the user know that it failed to view the page. The assistant may provide further guidance based on the above information.\n "
106
107
f"The assistant can tell the user that they can try manually fetching the page by using the fetch prompt within their UI." ,
107
- )
108
+ ))
108
109
109
110
110
111
async def fetch_url (
@@ -124,12 +125,12 @@ async def fetch_url(
124
125
timeout = 30 ,
125
126
)
126
127
except HTTPError as e :
127
- raise McpError (INTERNAL_ERROR , f"Failed to fetch { url } : { e !r} " )
128
+ raise McpError (ErrorData ( code = INTERNAL_ERROR , message = f"Failed to fetch { url } : { e !r} " ) )
128
129
if response .status_code >= 400 :
129
- raise McpError (
130
- INTERNAL_ERROR ,
131
- f"Failed to fetch { url } - status code { response .status_code } " ,
132
- )
130
+ raise McpError (ErrorData (
131
+ code = INTERNAL_ERROR ,
132
+ message = f"Failed to fetch { url } - status code { response .status_code } " ,
133
+ ))
133
134
134
135
page_raw = response .text
135
136
@@ -221,27 +222,39 @@ async def call_tool(name, arguments: dict) -> list[TextContent]:
221
222
try :
222
223
args = Fetch (** arguments )
223
224
except ValueError as e :
224
- raise McpError (INVALID_PARAMS , str (e ))
225
+ raise McpError (ErrorData ( code = INVALID_PARAMS , message = str (e ) ))
225
226
226
227
url = str (args .url )
227
228
if not url :
228
- raise McpError (INVALID_PARAMS , "URL is required" )
229
+ raise McpError (ErrorData ( code = INVALID_PARAMS , message = "URL is required" ) )
229
230
230
231
if not ignore_robots_txt :
231
232
await check_may_autonomously_fetch_url (url , user_agent_autonomous )
232
233
233
234
content , prefix = await fetch_url (
234
235
url , user_agent_autonomous , force_raw = args .raw
235
236
)
236
- if len (content ) > args .max_length :
237
- content = content [args .start_index : args .start_index + args .max_length ]
238
- content += f"\n \n <error>Content truncated. Call the fetch tool with a start_index of { args .start_index + args .max_length } to get more content.</error>"
237
+ original_length = len (content )
238
+ if args .start_index >= original_length :
239
+ content = "<error>No more content available.</error>"
240
+ else :
241
+ truncated_content = content [args .start_index : args .start_index + args .max_length ]
242
+ if not truncated_content :
243
+ content = "<error>No more content available.</error>"
244
+ else :
245
+ content = truncated_content
246
+ actual_content_length = len (truncated_content )
247
+ remaining_content = original_length - (args .start_index + actual_content_length )
248
+ # Only add the prompt to continue fetching if there is still remaining content
249
+ if actual_content_length == args .max_length and remaining_content > 0 :
250
+ next_start = args .start_index + actual_content_length
251
+ content += f"\n \n <error>Content truncated. Call the fetch tool with a start_index of { next_start } to get more content.</error>"
239
252
return [TextContent (type = "text" , text = f"{ prefix } Contents of { url } :\n { content } " )]
240
253
241
254
@server .get_prompt ()
242
255
async def get_prompt (name : str , arguments : dict | None ) -> GetPromptResult :
243
256
if not arguments or "url" not in arguments :
244
- raise McpError (INVALID_PARAMS , "URL is required" )
257
+ raise McpError (ErrorData ( code = INVALID_PARAMS , message = "URL is required" ) )
245
258
246
259
url = arguments ["url" ]
247
260
0 commit comments