Skip to content

Commit ed7d881

Browse files
authored
Merge branch 'main' into patch-2
2 parents 4ecc937 + e181222 commit ed7d881

File tree

7 files changed

+147
-24
lines changed

7 files changed

+147
-24
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ Official integrations are maintained by companies building production ready MCP
7373
- <img height="12" width="12" src="https://cdn.prod.website-files.com/6605a2979ff17b2cd1939cd4/6605a460de47e7596ed84f06_icon256.png" alt="gotoHuman Logo" /> **[gotoHuman](https://github.com/gotohuman/gotohuman-mcp-server)** - Human-in-the-loop platform - Allow AI agents and automations to send requests for approval to your [gotoHuman](https://www.gotohuman.com) inbox.
7474
- <img height="12" width="12" src="https://grafana.com/favicon.ico" alt="Grafana Logo" /> **[Grafana](https://github.com/grafana/mcp-grafana)** - Search dashboards, investigate incidents and query datasources in your Grafana instance
7575
- <img height="12" width="12" src="https://framerusercontent.com/images/KCOWBYLKunDff1Dr452y6EfjiU.png" alt="Graphlit Logo" /> **[Graphlit](https://github.com/graphlit/graphlit-mcp-server)** - Ingest anything from Slack to Gmail to podcast feeds, in addition to web crawling, into a searchable [Graphlit](https://www.graphlit.com) project.
76+
- <img height="12" width="12" src="https://greptime.com/favicon.ico" alt="Greptime Logo" /> **[GreptimeDB](https://github.com/GreptimeTeam/greptimedb-mcp-server)** - Provides AI assistants with a secure and structured way to explore and analyze data in [GreptimeDB](https://github.com/GreptimeTeam/greptimedb).
7677
- <img height="12" width="12" src="https://img.alicdn.com/imgextra/i3/O1CN01d9qrry1i6lTNa2BRa_!!6000000004364-2-tps-218-200.png" alt="Hologres Logo" /> **[Hologres](https://github.com/aliyun/alibabacloud-hologres-mcp-server)** - Connect to a [Hologres](https://www.alibabacloud.com/en/product/hologres) instance, get table metadata, query and analyze data.
7778
- <img height="12" width="12" src="https://hyperbrowser-assets-bucket.s3.us-east-1.amazonaws.com/Hyperbrowser-logo.png" alt="Hyperbrowsers23 Logo" /> **[Hyperbrowser](https://github.com/hyperbrowserai/mcp)** - [Hyperbrowser](https://www.hyperbrowser.ai/) is the next-generation platform empowering AI agents and enabling effortless, scalable browser automation.
7879
- **[IBM wxflows](https://github.com/IBM/wxflows/tree/main/examples/mcp/javascript)** - Tool platform by IBM to build, test and deploy tools for any data source

src/fetch/README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,10 @@ ModelContextProtocol/1.0 (User-Specified; +https://github.com/modelcontextprotoc
107107

108108
This can be customized by adding the argument `--user-agent=YourUserAgent` to the `args` list in the configuration.
109109

110+
### Customization - Proxy
111+
112+
The server can be configured to use a proxy by using the `--proxy-url` argument.
113+
110114
## Debugging
111115

112116
You can use the MCP inspector to debug the server. For uvx installations:

src/fetch/pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "mcp-server-fetch"
3-
version = "0.6.2"
3+
version = "0.6.3"
44
description = "A Model Context Protocol server providing tools to fetch and convert web content for usage by LLMs"
55
readme = "README.md"
66
requires-python = ">=3.10"
@@ -16,6 +16,7 @@ classifiers = [
1616
"Programming Language :: Python :: 3.10",
1717
]
1818
dependencies = [
19+
"httpx<0.28",
1920
"markdownify>=0.13.1",
2021
"mcp>=1.1.3",
2122
"protego>=0.3.1",

src/fetch/src/mcp_server_fetch/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,10 @@ def main():
1515
action="store_true",
1616
help="Ignore robots.txt restrictions",
1717
)
18+
parser.add_argument("--proxy-url", type=str, help="Proxy URL to use for requests")
1819

1920
args = parser.parse_args()
20-
asyncio.run(serve(args.user_agent, args.ignore_robots_txt))
21+
asyncio.run(serve(args.user_agent, args.ignore_robots_txt, args.proxy_url))
2122

2223

2324
if __name__ == "__main__":

src/fetch/src/mcp_server_fetch/server.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def get_robots_txt_url(url: str) -> str:
6363
return robots_url
6464

6565

66-
async def check_may_autonomously_fetch_url(url: str, user_agent: str) -> None:
66+
async def check_may_autonomously_fetch_url(url: str, user_agent: str, proxy_url: str | None = None) -> None:
6767
"""
6868
Check if the URL can be fetched by the user agent according to the robots.txt file.
6969
Raises a McpError if not.
@@ -72,7 +72,7 @@ async def check_may_autonomously_fetch_url(url: str, user_agent: str) -> None:
7272

7373
robot_txt_url = get_robots_txt_url(url)
7474

75-
async with AsyncClient() as client:
75+
async with AsyncClient(proxies=proxy_url) as client:
7676
try:
7777
response = await client.get(
7878
robot_txt_url,
@@ -109,14 +109,14 @@ async def check_may_autonomously_fetch_url(url: str, user_agent: str) -> None:
109109

110110

111111
async def fetch_url(
112-
url: str, user_agent: str, force_raw: bool = False
112+
url: str, user_agent: str, force_raw: bool = False, proxy_url: str | None = None
113113
) -> Tuple[str, str]:
114114
"""
115115
Fetch the URL and return the content in a form ready for the LLM, as well as a prefix string with status information.
116116
"""
117117
from httpx import AsyncClient, HTTPError
118118

119-
async with AsyncClient() as client:
119+
async with AsyncClient(proxies=proxy_url) as client:
120120
try:
121121
response = await client.get(
122122
url,
@@ -173,19 +173,22 @@ class Fetch(BaseModel):
173173
bool,
174174
Field(
175175
default=False,
176-
description="Get the actual HTML content if the requested page, without simplification.",
176+
description="Get the actual HTML content of the requested page, without simplification.",
177177
),
178178
]
179179

180180

181181
async def serve(
182-
custom_user_agent: str | None = None, ignore_robots_txt: bool = False
182+
custom_user_agent: str | None = None,
183+
ignore_robots_txt: bool = False,
184+
proxy_url: str | None = None,
183185
) -> None:
184186
"""Run the fetch MCP server.
185187
186188
Args:
187189
custom_user_agent: Optional custom User-Agent string to use for requests
188190
ignore_robots_txt: Whether to ignore robots.txt restrictions
191+
proxy_url: Optional proxy URL to use for requests
189192
"""
190193
server = Server("mcp-fetch")
191194
user_agent_autonomous = custom_user_agent or DEFAULT_USER_AGENT_AUTONOMOUS
@@ -229,10 +232,10 @@ async def call_tool(name, arguments: dict) -> list[TextContent]:
229232
raise McpError(ErrorData(code=INVALID_PARAMS, message="URL is required"))
230233

231234
if not ignore_robots_txt:
232-
await check_may_autonomously_fetch_url(url, user_agent_autonomous)
235+
await check_may_autonomously_fetch_url(url, user_agent_autonomous, proxy_url)
233236

234237
content, prefix = await fetch_url(
235-
url, user_agent_autonomous, force_raw=args.raw
238+
url, user_agent_autonomous, force_raw=args.raw, proxy_url=proxy_url
236239
)
237240
original_length = len(content)
238241
if args.start_index >= original_length:
@@ -259,7 +262,7 @@ async def get_prompt(name: str, arguments: dict | None) -> GetPromptResult:
259262
url = arguments["url"]
260263

261264
try:
262-
content, prefix = await fetch_url(url, user_agent_manual)
265+
content, prefix = await fetch_url(url, user_agent_manual, proxy_url=proxy_url)
263266
# TODO: after SDK bug is addressed, don't catch the exception
264267
except McpError as e:
265268
return GetPromptResult(

src/puppeteer/README.md

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,10 @@ A Model Context Protocol server that provides browser automation capabilities us
88

99
- **puppeteer_navigate**
1010
- Navigate to any URL in the browser
11-
- Input: `url` (string)
11+
- Inputs:
12+
- `url` (string, required): URL to navigate to
13+
- `launchOptions` (object, optional): PuppeteerJS LaunchOptions. Default null. If changed and not null, browser restarts. Example: `{ headless: true, args: ['--user-data-dir="C:/Data"'] }`
14+
- `allowDangerous` (boolean, optional): Allow dangerous LaunchOptions that reduce security. When false, dangerous args like `--no-sandbox`, `--disable-web-security` will throw errors. Default false.
1215

1316
- **puppeteer_screenshot**
1417
- Capture screenshots of the entire page or specific elements
@@ -61,6 +64,7 @@ The server provides access to two types of resources:
6164
- Screenshot capabilities
6265
- JavaScript execution
6366
- Basic web interaction (navigation, clicking, form filling)
67+
- Customizable Puppeteer launch options
6468

6569
## Configuration to use Puppeteer Server
6670
Here's the Claude Desktop configuration to use the Puppeter server:
@@ -93,6 +97,39 @@ Here's the Claude Desktop configuration to use the Puppeter server:
9397
}
9498
```
9599

100+
### Launch Options
101+
102+
You can customize Puppeteer's browser behavior in two ways:
103+
104+
1. **Environment Variable**: Set `PUPPETEER_LAUNCH_OPTIONS` with a JSON-encoded string in the MCP configuration's `env` parameter:
105+
106+
```json
107+
{
108+
"mcpServers": {
109+
"mcp-puppeteer": {
110+
"command": "npx",
111+
"args": ["-y", "@modelcontextprotocol/server-puppeteer"]
112+
"env": {
113+
"PUPPETEER_LAUNCH_OPTIONS": "{ \"headless\": false, \"executablePath\": \"C:/Program Files/Google/Chrome/Application/chrome.exe\", \"args\": [] }",
114+
"ALLOW_DANGEROUS": "true"
115+
}
116+
}
117+
}
118+
}
119+
```
120+
121+
2. **Tool Call Arguments**: Pass `launchOptions` and `allowDangerous` parameters to the `puppeteer_navigate` tool:
122+
123+
```json
124+
{
125+
"url": "https://example.com",
126+
"launchOptions": {
127+
"headless": false,
128+
"defaultViewport": {"width": 1280, "height": 720}
129+
}
130+
}
131+
```
132+
96133
## Build
97134

98135
Docker build:
@@ -103,4 +140,4 @@ docker build -t mcp/puppeteer -f src/puppeteer/Dockerfile .
103140

104141
## License
105142

106-
This MCP server is licensed under the MIT License. This means you are free to use, modify, and distribute the software, subject to the terms and conditions of the MIT License. For more details, please see the LICENSE file in the project repository.
143+
This MCP server is licensed under the MIT License. This means you are free to use, modify, and distribute the software, subject to the terms and conditions of the MIT License. For more details, please see the LICENSE file in the project repository.

src/puppeteer/index.ts

Lines changed: 87 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,9 @@ const TOOLS: Tool[] = [
2222
inputSchema: {
2323
type: "object",
2424
properties: {
25-
url: { type: "string" },
25+
url: { type: "string", description: "URL to navigate to" },
26+
launchOptions: { type: "object", description: "PuppeteerJS LaunchOptions. Default null. If changed and not null, browser restarts. Example: { headless: true, args: ['--no-sandbox'] }" },
27+
allowDangerous: { type: "boolean", description: "Allow dangerous LaunchOptions that reduce security. When false, dangerous args like --no-sandbox will throw errors. Default false." },
2628
},
2729
required: ["url"],
2830
},
@@ -101,16 +103,65 @@ const TOOLS: Tool[] = [
101103
];
102104

103105
// Global state
104-
let browser: Browser | undefined;
105-
let page: Page | undefined;
106+
let browser: Browser | null;
107+
let page: Page | null;
106108
const consoleLogs: string[] = [];
107109
const screenshots = new Map<string, string>();
110+
let previousLaunchOptions: any = null;
111+
112+
async function ensureBrowser({ launchOptions, allowDangerous }: any) {
113+
114+
const DANGEROUS_ARGS = [
115+
'--no-sandbox',
116+
'--disable-setuid-sandbox',
117+
'--single-process',
118+
'--disable-web-security',
119+
'--ignore-certificate-errors',
120+
'--disable-features=IsolateOrigins',
121+
'--disable-site-isolation-trials',
122+
'--allow-running-insecure-content'
123+
];
124+
125+
// Parse environment config safely
126+
let envConfig = {};
127+
try {
128+
envConfig = JSON.parse(process.env.PUPPETEER_LAUNCH_OPTIONS || '{}');
129+
} catch (error: any) {
130+
console.warn('Failed to parse PUPPETEER_LAUNCH_OPTIONS:', error?.message || error);
131+
}
132+
133+
// Deep merge environment config with user-provided options
134+
const mergedConfig = deepMerge(envConfig, launchOptions || {});
135+
136+
// Security validation for merged config
137+
if (mergedConfig?.args) {
138+
const dangerousArgs = mergedConfig.args?.filter?.((arg: string) => DANGEROUS_ARGS.some((dangerousArg: string) => arg.startsWith(dangerousArg)));
139+
if (dangerousArgs?.length > 0 && !(allowDangerous || (process.env.ALLOW_DANGEROUS === 'true'))) {
140+
throw new Error(`Dangerous browser arguments detected: ${dangerousArgs.join(', ')}. Fround from environment variable and tool call argument. ` +
141+
'Set allowDangerous: true in the tool call arguments to override.');
142+
}
143+
}
144+
145+
try {
146+
if ((browser && !browser.connected) ||
147+
(launchOptions && (JSON.stringify(launchOptions) != JSON.stringify(previousLaunchOptions)))) {
148+
await browser?.close();
149+
browser = null;
150+
}
151+
}
152+
catch (error) {
153+
browser = null;
154+
}
155+
156+
previousLaunchOptions = launchOptions;
108157

109-
async function ensureBrowser() {
110158
if (!browser) {
111159
const npx_args = { headless: false }
112160
const docker_args = { headless: true, args: ["--no-sandbox", "--single-process", "--no-zygote"] }
113-
browser = await puppeteer.launch(process.env.DOCKER_CONTAINER ? docker_args : npx_args);
161+
browser = await puppeteer.launch(deepMerge(
162+
process.env.DOCKER_CONTAINER ? docker_args : npx_args,
163+
mergedConfig
164+
));
114165
const pages = await browser.pages();
115166
page = pages[0];
116167

@@ -126,6 +177,31 @@ async function ensureBrowser() {
126177
return page!;
127178
}
128179

180+
// Deep merge utility function
181+
function deepMerge(target: any, source: any): any {
182+
const output = Object.assign({}, target);
183+
if (typeof target !== 'object' || typeof source !== 'object') return source;
184+
185+
for (const key of Object.keys(source)) {
186+
const targetVal = target[key];
187+
const sourceVal = source[key];
188+
if (Array.isArray(targetVal) && Array.isArray(sourceVal)) {
189+
// Deduplicate args/ignoreDefaultArgs, prefer source values
190+
output[key] = [...new Set([
191+
...(key === 'args' || key === 'ignoreDefaultArgs' ?
192+
targetVal.filter((arg: string) => !sourceVal.some((launchArg: string) => arg.startsWith('--') && launchArg.startsWith(arg.split('=')[0]))) :
193+
targetVal),
194+
...sourceVal
195+
])];
196+
} else if (sourceVal instanceof Object && key in target) {
197+
output[key] = deepMerge(targetVal, sourceVal);
198+
} else {
199+
output[key] = sourceVal;
200+
}
201+
}
202+
return output;
203+
}
204+
129205
declare global {
130206
interface Window {
131207
mcpHelper: {
@@ -136,7 +212,7 @@ declare global {
136212
}
137213

138214
async function handleToolCall(name: string, args: any): Promise<CallToolResult> {
139-
const page = await ensureBrowser();
215+
const page = await ensureBrowser(args);
140216

141217
switch (name) {
142218
case "puppeteer_navigate":
@@ -285,15 +361,15 @@ async function handleToolCall(name: string, args: any): Promise<CallToolResult>
285361
window.mcpHelper.logs.push(`[${method}] ${args.join(' ')}`);
286362
(window.mcpHelper.originalConsole as any)[method](...args);
287363
};
288-
} );
289-
} );
364+
});
365+
});
290366

291-
const result = await page.evaluate( args.script );
367+
const result = await page.evaluate(args.script);
292368

293369
const logs = await page.evaluate(() => {
294370
Object.assign(console, window.mcpHelper.originalConsole);
295371
const logs = window.mcpHelper.logs;
296-
delete ( window as any).mcpHelper;
372+
delete (window as any).mcpHelper;
297373
return logs;
298374
});
299375

@@ -405,4 +481,4 @@ runServer().catch(console.error);
405481
process.stdin.on("close", () => {
406482
console.error("Puppeteer MCP Server closed");
407483
server.close();
408-
});
484+
});

0 commit comments

Comments
 (0)