diff --git a/.env.example b/.env.example index fc7f5e5..d9b39d9 100644 --- a/.env.example +++ b/.env.example @@ -2,4 +2,10 @@ CHROME_PATH= # OpenAI API key for OpenAI model access -OPENAI_API_KEY=your-api-key-here \ No newline at end of file +OPENAI_API_KEY=your-api-key-here + +# Set to true if you want api calls to wait for tasks to complete (default is false) +PATIENT=false + +# Set to true if you want to disable anonymous telemetry (default is false) +ANONYMIZED_TELEMETRY=false \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..392a205 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,30 @@ +--- +name: Bug Report +about: Create a report to help us improve +title: "[BUG] " +labels: bug +assignees: "" +--- + +**Describe the bug** A clear and concise description of what the bug is. + +**To Reproduce** Steps to reproduce the behavior: + +1. Go to '...' +2. Click on '....' +3. Scroll down to '....' +4. See error + +**Expected behavior** A clear and concise description of what you expected to +happen. + +**Screenshots** If applicable, add screenshots to help explain your problem. + +**Environment (please complete the following information):** + +- OS: [e.g. macOS, Windows, Linux] + - Version [e.g. 1.2.3] +- Client [if applicable] + - Version [e.g. 1.2.3] + +**Additional context** Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000..bbcc2bb --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,19 @@ +--- +name: Feature Request +about: Suggest an idea for this project +title: "[FEATURE] " +labels: enhancement +assignees: "" +--- + +**Is your feature request related to a problem? Please describe.** A clear and +concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Describe the solution you'd like** A clear and concise description of what you +want to happen. + +**Describe alternatives you've considered** A clear and concise description of +any alternative solutions or features you've considered. + +**Additional context** Add any other context or screenshots about the feature +request here. diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000..4af3d37 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,31 @@ +## Description + +Please include a summary of the changes and which issue is fixed. Please also +include relevant motivation and context. + +Fixes # (issue) + +## Type of change + +Please delete options that are not relevant. + +- [ ] Bug fix (non-breaking change which fixes an issue) +- [ ] New feature (non-breaking change which adds functionality) +- [ ] Breaking change (fix or feature that would cause existing functionality to + not work as expected) +- [ ] Documentation update + +## How Has This Been Tested? + +Please describe the tests that you ran to verify your changes. Provide +instructions so we can reproduce. + +## Checklist: + +- [ ] My code follows the style guidelines of this project +- [ ] I have performed a self-review of my code +- [ ] I have commented my code, particularly in hard-to-understand areas +- [ ] I have made corresponding changes to the documentation +- [ ] My changes generate no new warnings +- [ ] I have added tests that prove my fix is effective or that my feature works +- [ ] New and existing unit tests pass locally with my changes (if applicable) diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..6b3061c --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,132 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, caste, color, religion, or sexual +identity and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our +community include: + +- Demonstrating empathy and kindness toward other people +- Being respectful of differing opinions, viewpoints, and experiences +- Giving and gracefully accepting constructive feedback +- Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +- Focusing on what is best not just for us as individuals, but for the overall + community + +Examples of unacceptable behavior include: + +- The use of sexualized language or imagery, and sexual attention or advances of + any kind +- Trolling, insulting or derogatory comments, and personal or political attacks +- Public or private harassment +- Publishing others' private information, such as a physical or email address, + without their explicit permission +- Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official email address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement at [info at +cobrowser.xyz]. All complaints will be reviewed and investigated promptly and +fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series of +actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or permanent +ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within the +community. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 2.1, available at +[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1]. + +Community Impact Guidelines were inspired by [Mozilla's code of conduct +enforcement ladder][Mozilla CoC]. + +For answers to common questions about this code of conduct, see the FAQ at +[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at +[https://www.contributor-covenant.org/translations][translations]. + +[homepage]: https://www.contributor-covenant.org +[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html +[Mozilla CoC]: https://github.com/mozilla/diversity +[FAQ]: https://www.contributor-covenant.org/faq +[translations]: https://www.contributor-covenant.org/translations diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..8280aa9 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,99 @@ +# Contributing to browser-use MCP Server + +First off, thank you for considering contributing to browser-use MCP Server! +This project is released under the MIT License, which means your contributions +will also be covered under the same permissive license. + +### Table of Contents + +- [Code of Conduct](#code-of-conduct) +- [Getting Started](#getting-started) +- [How to Contribute](#how-to-contribute) + - [Guidelines for Non-Code Contributions](#guidelines-for-non-code-contributions) + - [Reporting Bugs](#reporting-bugs) + - [Suggesting Enhancements](#suggesting-enhancements) + - [Pull Requests](#pull-requests) +- [Development Process](#development-process) +- [License](#license) + +## Code of Conduct + +We have adopted a Code of Conduct that we expect project participants to adhere +to. Please read [the full text](CODE_OF_CONDUCT.md) so that you can understand +what actions will and will not be tolerated. + +## Getting Started + +### Fork-based workflow (recommended as a playground) + +1. Fork the repository +2. Clone your fork: + `git clone https://github.com/your-username/browser-use-mcp-server.git` +3. Create a new branch: `git checkout -b feature/your-feature-name` +4. Make your changes +5. Push to your fork: `git push origin feature/your-feature-name` +6. Open a Pull Request + +### Direct repository workflow (for contributors) + +1. Clone the repository directly: + `git clone https://github.com/co-browser/browser-use-mcp-server.git` +2. Create a new branch: `git checkout -b feature/your-feature-name` +3. Make your changes +4. Push to the repository: `git push origin feature/your-feature-name` +5. Open a Pull Request + +If you're interested in being contributor, please reach out to the maintainers +after making a few successful contributions via issues and pull requests. + +## How to Contribute + +### Guidelines for Non-Code Contributions + +We appreciate your attention to detail. However, minor fixes like typos or +grammar corrections should not be submitted individually. Instead, create an +issue noting these corrections, and we'll batch them into larger updates. + +### Reporting Bugs + +We use GitHub issues to track bugs. Before creating a bug report: + +- Search existing + [Issues](https://github.com/co-browser/browser-use-mcp-server/issues) to + ensure it hasn't already been reported +- If you find a closed issue that seems to address your problem, open a new + issue and include a link to the original + +When submitting a bug report, please use our bug report template and include as +much detail as possible. + +### Suggesting Enhancements + +Enhancement suggestions are tracked through GitHub issues. Please use our +feature request template when suggesting enhancements. + +### Pull Requests + +- Follow our pull request template +- Include screenshots and animated GIFs in your pull request whenever possible +- Follow our coding conventions and style guidelines +- Write meaningful commit messages +- Update documentation as needed +- Add tests for new features +- Pull requests undergo automated checks, including build and linting + +## Development Process + +1. Pick an issue to work on or create a new one +2. Comment on the issue to let others know you're working on it +3. Create a branch with a descriptive name +4. Write your code following our style guidelines +5. Add tests for new functionality +6. Update documentation as needed +7. Submit a pull request +8. Respond to code review feedback + +## License + +By contributing to browser-use MCP Server, you agree that your contributions +will be licensed under the MIT License. See [LICENSE](LICENSE) for details. diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..12ac0e7 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 cobrowser.xyz + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index 9c6bf23..02be82b 100644 --- a/README.md +++ b/README.md @@ -1,101 +1,71 @@ -# ➡️ browser-use mcp server +# browser-use-mcp-server + +
[![Twitter URL](https://img.shields.io/twitter/url/https/twitter.com/cobrowser.svg?style=social&label=Follow%20%40cobrowser)](https://x.com/cobrowser) [![PyPI version](https://badge.fury.io/py/browser-use-mcp-server.svg)](https://pypi.org/project/browser-use-mcp-server/) -[browser-use](https://github.com/browser-use/browser-use) MCP Server with SSE + -stdio transport +**An MCP server that enables AI agents to control web browsers using +[browser-use](https://github.com/browser-use/browser-use).** -### Requirements +
-- [uv](https://github.com/astral-sh/uv) -- [mcp-proxy](https://github.com/sparfenyuk/mcp-proxy) (for stdio) +## Prerequisites -``` -# 1. Install uv +- [uv](https://github.com/astral-sh/uv) - Fast Python package manager +- [Playwright](https://playwright.dev/) - Browser automation +- [mcp-proxy](https://github.com/sparfenyuk/mcp-proxy) - Required for stdio mode + +```bash +# Install prerequisites curl -LsSf https://astral.sh/uv/install.sh | sh -# 2. Install mcp-proxy pypi package via uv uv tool install mcp-proxy +uv tool update-shell ``` -### Quickstart +## Environment -Starting in SSE mode: +Create a `.env` file: -```bash -uv sync -uv pip install playwright -uv run playwright install --with-deps --no-shell chromium -uv run server --port 8000 +``` +OPENAI_API_KEY=your-api-key +CHROME_PATH=optional/path/to/chrome +PATIENT=false # Set to true if API calls should wait for task completion ``` -With stdio mode: +## Installation ```bash -# Run with stdio mode and specify a proxy port -uv run server --stdio --proxy-port 8001 - -# Or just stdio mode (random proxy port) -uv run server --stdio +# Install dependencies +uv sync +uv pip install playwright +uv run playwright install --with-deps --no-shell chromium ``` -- the .env requires the following: - -``` -OPENAI_API_KEY=[your api key] -CHROME_PATH=[only change this if you have a custom chrome build] -``` +## Usage -When building the docker image, you can use Docker secrets for VNC password: +### SSE Mode ```bash -# With Docker secrets (recommended for production) -echo "your-secure-password" > vnc_password.txt -docker run -v $(pwd)/vnc_password.txt:/run/secrets/vnc_password your-image-name - -# Or during development with the default password -docker build . +# Run directly from source +uv run server --port 8000 ``` -### Tools +### stdio Mode -- [x] SSE transport -- [x] stdio transport (via mcp-proxy) -- [x] browser_use - Initiates browser tasks with URL and action -- [x] browser_get_result - Retrieves results of async browser tasks -- [x] VNC server - stream the dockerized browser to your client - -### VNC - -the dockerfile has a vnc server with a default password of browser-use. connect -to it: +```bash +# 1. Build and install globally +uv build +uv tool uninstall browser-use-mcp-server 2>/dev/null || true +uv tool install dist/browser_use_mcp_server-*.whl +# 2. Run with stdio transport +browser-use-mcp-server run server --port 8000 --stdio --proxy-port 9000 ``` -docker build -t browser-use-mcp-server . -docker run --rm -p8000:8000 -p5900:5900 browser-use-mcp-server -git clone https://github.com/novnc/noVNC -cd noVNC -./utils/novnc_proxy --vnc localhost:5900 -``` - -

-Screenshot 2025-03-24 at 12 03 15 PM -
-Screenshot 2025-03-24 at 12 11 42 PM -

- -### Supported Clients - -- cursor.ai -- claude desktop -- claude code -- windsurf ([windsurf](https://codeium.com/windsurf) doesn't support SSE, only - stdio) -#### SSE Mode +## Client Configuration -After running the server in SSE mode, add http://localhost:8000/sse to your -client UI, or in a mcp.json file: +### SSE Mode ```json { @@ -107,20 +77,7 @@ client UI, or in a mcp.json file: } ``` -#### stdio Mode - -When running in stdio mode, the server will automatically start both the SSE -server and mcp-proxy. The proxy handles the conversion between stdio and SSE -protocols. No additional configuration is needed - just start your client and it -will communicate with the server through stdin/stdout. - -Install the cli - -```bash -uv pip install -e . -``` - -And then e.g., in Windsurf, paste: +### stdio Mode ```json { @@ -135,41 +92,115 @@ And then e.g., in Windsurf, paste: "--stdio", "--proxy-port", "9000" - ] + ], + "env": { + "OPENAI_API_KEY": "your-api-key" + } } } } ``` -### Client Configuration Paths +### Config Locations + +| Client | Configuration Path | +| ---------------- | ----------------------------------------------------------------- | +| Cursor | `./.cursor/mcp.json` | +| Windsurf | `~/.codeium/windsurf/mcp_config.json` | +| Claude (Mac) | `~/Library/Application Support/Claude/claude_desktop_config.json` | +| Claude (Windows) | `%APPDATA%\Claude\claude_desktop_config.json` | + +## Features + +- [x] **Browser Automation**: Control browsers through AI agents +- [x] **Dual Transport**: Support for both SSE and stdio protocols +- [x] **VNC Streaming**: Watch browser automation in real-time +- [x] **Async Tasks**: Execute browser operations asynchronously + +## Local Development + +To develop and test the package locally: + +1. Build a distributable wheel: + + ```bash + # From the project root directory + uv build + ``` + +2. Install it as a global tool: + + ```bash + uv tool uninstall browser-use-mcp-server 2>/dev/null || true + uv tool install dist/browser_use_mcp_server-*.whl + ``` -#### Cursor +3. Run from any directory: -- `./.cursor/mcp.json` + ```bash + # Set your OpenAI API key for the current session + export OPENAI_API_KEY=your-api-key-here -#### Windsurf + # Or provide it inline for a one-time run + OPENAI_API_KEY=your-api-key-here browser-use-mcp-server run server --port 8000 --stdio --proxy-port 9000 + ``` -- `~/.codeium/windsurf/mcp_config.json` +4. After making changes, rebuild and reinstall: + ```bash + uv build + uv tool uninstall browser-use-mcp-server + uv tool install dist/browser_use_mcp_server-*.whl + ``` -#### Claude +## Docker -- `~/Library/Application Support/Claude/claude_desktop_config.json` -- `%APPDATA%\Claude\claude_desktop_config.json` +```bash +# Run with default VNC password +docker build -t browser-use-mcp-server . +docker run --rm -p8000:8000 -p5900:5900 browser-use-mcp-server -### Example Usage +# Use custom VNC password +echo "your-password" > vnc_password.txt +docker run --rm -p8000:8000 -p5900:5900 \ + -v $(pwd)/vnc_password.txt:/run/secrets/vnc_password \ + browser-use-mcp-server +``` -Try asking your LLM the following: +### VNC Viewer -`open https://news.ycombinator.com and return the top ranked article` +```bash +# Browser-based viewer +git clone https://github.com/novnc/noVNC +cd noVNC +./utils/novnc_proxy --vnc localhost:5900 +``` + +Default password: `browser-use` + +
+ VNC Screenshot +

+ VNC Screenshot +
+ +## Example + +Try asking your AI: + +``` +open https://news.ycombinator.com and return the top ranked article +``` -### Help +## Support -for issues or interest reach out @ https://cobrowser.xyz +For issues or inquiries: [cobrowser.xyz](https://cobrowser.xyz) -# Stars +## Star History - - - - Star History Chart - +
+ + + + Star History Chart + +
diff --git a/pyproject.toml b/pyproject.toml index 7abfc66..38c1cfb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,7 @@ dependencies = [ "pydantic>=2.10.6", "anyio", "python-dotenv", + "python-json-logger>=2.0.7", "starlette", "uvicorn", "playwright>=1.50.0", @@ -72,7 +73,8 @@ disallow_incomplete_defs = true browser-use-mcp-server = "browser_use_mcp_server.cli:cli" [tool.hatch.build] -packages = ["src/browser_use_mcp_server"] +packages = ["src", "server"] +include = ["server"] [tool.hatch.build.targets.wheel] -packages = ["src/browser_use_mcp_server"] +packages = ["src/browser_use_mcp_server", "server"] diff --git a/server/server.py b/server/server.py index 8bc5541..52eff2c 100644 --- a/server/server.py +++ b/server/server.py @@ -19,10 +19,12 @@ from datetime import datetime from typing import Any, Dict, Optional, Tuple, Union import time +import sys # Third-party imports import click from dotenv import load_dotenv +from pythonjsonlogger import jsonlogger # Browser-use library imports from browser_use import Agent @@ -38,14 +40,50 @@ from langchain_core.language_models import BaseLanguageModel # Configure logging -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) +logger = logging.getLogger() +logger.handlers = [] # Remove any existing handlers +handler = logging.StreamHandler(sys.stderr) +formatter = jsonlogger.JsonFormatter( + '{"time":"%(asctime)s","level":"%(levelname)s","name":"%(name)s","message":"%(message)s"}' +) +handler.setFormatter(formatter) +logger.addHandler(handler) +logger.setLevel(logging.INFO) + +# Ensure uvicorn also logs to stderr in JSON format +uvicorn_logger = logging.getLogger("uvicorn") +uvicorn_logger.handlers = [] +uvicorn_logger.addHandler(handler) + +# Ensure all other loggers use the same format +logging.getLogger("browser_use").addHandler(handler) +logging.getLogger("playwright").addHandler(handler) +logging.getLogger("mcp").addHandler(handler) # Load environment variables load_dotenv() -def init_configuration() -> Dict[str, any]: +def parse_bool_env(env_var: str, default: bool = False) -> bool: + """ + Parse a boolean environment variable. + + Args: + env_var: The environment variable name + default: Default value if not set + + Returns: + Boolean value of the environment variable + """ + value = os.environ.get(env_var) + if value is None: + return default + + # Consider various representations of boolean values + return value.lower() in ("true", "yes", "1", "y", "on") + + +def init_configuration() -> Dict[str, Any]: """ Initialize configuration from environment variables with defaults. @@ -79,6 +117,8 @@ def init_configuration() -> Dict[str, any]: "--disable-dev-shm-usage", "--remote-debugging-port=0", # Use random port to avoid conflicts ], + # Patient mode - if true, functions wait for task completion before returning + "PATIENT_MODE": parse_bool_env("PATIENT", False), } return config @@ -164,6 +204,9 @@ async def run_browser_task_async( This function executes a browser automation task with the given URL and action, and updates the task store with progress and results. + When PATIENT_MODE is enabled, the calling function will wait for this function + to complete before returning to the client. + Args: task_id: Unique identifier for the task url: URL to navigate to @@ -383,7 +426,9 @@ async def call_tool( arguments: The arguments to pass to the tool Returns: - A list of content objects to return to the client + A list of content objects to return to the client. + When PATIENT_MODE is enabled, the browser_use tool will wait for the task to complete + and return the full result immediately instead of just the task ID. Raises: ValueError: If required arguments are missing @@ -409,7 +454,7 @@ async def call_tool( } # Start task in background - asyncio.create_task( + _task = asyncio.create_task( run_browser_task_async( task_id=task_id, url=arguments["url"], @@ -421,6 +466,38 @@ async def call_tool( ) ) + # If PATIENT is set, wait for the task to complete + if CONFIG["PATIENT_MODE"]: + try: + await _task + # Return the completed task result instead of just the ID + task_data = task_store[task_id] + if task_data["status"] == "failed": + logger.error( + f"Task {task_id} failed: {task_data.get('error', 'Unknown error')}" + ) + return [ + types.TextContent( + type="text", + text=json.dumps(task_data, indent=2), + ) + ] + except Exception as e: + logger.error(f"Error in patient mode execution: {str(e)}") + traceback_str = traceback.format_exc() + # Update task store with error + task_store[task_id]["status"] = "failed" + task_store[task_id]["error"] = str(e) + task_store[task_id]["traceback"] = traceback_str + task_store[task_id]["end_time"] = datetime.now().isoformat() + # Return error information + return [ + types.TextContent( + type="text", + text=json.dumps(task_store[task_id], indent=2), + ) + ] + # Return task ID immediately with explicit sleep instruction return [ types.TextContent( @@ -498,43 +575,85 @@ async def list_tools() -> list[types.Tool]: """ List the available tools for the MCP client. + Returns different tool descriptions based on the PATIENT_MODE configuration. + When PATIENT_MODE is enabled, the browser_use tool description indicates it returns + complete results directly. When disabled, it indicates async operation. + Returns: - A list of tool definitions + A list of tool definitions appropriate for the current configuration """ - return [ - types.Tool( - name="browser_use", - description="Performs a browser action and returns a task ID for async execution", - inputSchema={ - "type": "object", - "required": ["url", "action"], - "properties": { - "url": { - "type": "string", - "description": "URL to navigate to", + patient_mode = CONFIG["PATIENT_MODE"] + + if patient_mode: + return [ + types.Tool( + name="browser_use", + description="Performs a browser action and returns the complete result directly (patient mode active)", + inputSchema={ + "type": "object", + "required": ["url", "action"], + "properties": { + "url": { + "type": "string", + "description": "URL to navigate to", + }, + "action": { + "type": "string", + "description": "Action to perform in the browser", + }, }, - "action": { - "type": "string", - "description": "Action to perform in the browser", + }, + ), + types.Tool( + name="browser_get_result", + description="Gets the result of an asynchronous browser task (not needed in patient mode as browser_use returns complete results directly)", + inputSchema={ + "type": "object", + "required": ["task_id"], + "properties": { + "task_id": { + "type": "string", + "description": "ID of the task to get results for", + } }, }, - }, - ), - types.Tool( - name="browser_get_result", - description="Gets the result of an asynchronous browser task", - inputSchema={ - "type": "object", - "required": ["task_id"], - "properties": { - "task_id": { - "type": "string", - "description": "ID of the task to get results for", - } + ), + ] + else: + return [ + types.Tool( + name="browser_use", + description="Performs a browser action and returns a task ID for async execution", + inputSchema={ + "type": "object", + "required": ["url", "action"], + "properties": { + "url": { + "type": "string", + "description": "URL to navigate to", + }, + "action": { + "type": "string", + "description": "Action to perform in the browser", + }, + }, }, - }, - ), - ] + ), + types.Tool( + name="browser_get_result", + description="Gets the result of an asynchronous browser task", + inputSchema={ + "type": "object", + "required": ["task_id"], + "properties": { + "task_id": { + "type": "string", + "description": "ID of the task to get results for", + } + }, + }, + ), + ] @app.list_resources() async def list_resources() -> list[types.Resource]: @@ -745,7 +864,38 @@ async def startup_event(): # Function to run uvicorn in a separate thread def run_uvicorn(): - uvicorn.run(starlette_app, host="0.0.0.0", port=port) + # Configure uvicorn to use JSON logging + log_config = { + "version": 1, + "disable_existing_loggers": False, + "formatters": { + "json": { + "()": "pythonjsonlogger.jsonlogger.JsonFormatter", + "fmt": '{"time":"%(asctime)s","level":"%(levelname)s","name":"%(name)s","message":"%(message)s"}', + } + }, + "handlers": { + "default": { + "formatter": "json", + "class": "logging.StreamHandler", + "stream": "ext://sys.stderr", + } + }, + "loggers": { + "": {"handlers": ["default"], "level": "INFO"}, + "uvicorn": {"handlers": ["default"], "level": "INFO"}, + "uvicorn.error": {"handlers": ["default"], "level": "INFO"}, + "uvicorn.access": {"handlers": ["default"], "level": "INFO"}, + }, + } + + uvicorn.run( + starlette_app, + host="0.0.0.0", + port=port, + log_config=log_config, + log_level="info", + ) # If proxy mode is enabled, run both the SSE server and mcp-proxy if stdio: diff --git a/src/browser_use_mcp_server/cli.py b/src/browser_use_mcp_server/cli.py index 5642b2b..da15747 100644 --- a/src/browser_use_mcp_server/cli.py +++ b/src/browser_use_mcp_server/cli.py @@ -7,8 +7,28 @@ import os import sys +import json +import logging import click import importlib.util +from pythonjsonlogger import jsonlogger + +# Configure logging for CLI +logger = logging.getLogger() +logger.handlers = [] # Remove any existing handlers +handler = logging.StreamHandler(sys.stderr) +formatter = jsonlogger.JsonFormatter( + '{"time":"%(asctime)s","level":"%(levelname)s","name":"%(name)s","message":"%(message)s"}' +) +handler.setFormatter(formatter) +logger.addHandler(handler) +logger.setLevel(logging.INFO) + + +def log_error(message: str, error: Exception = None): + """Log error in JSON format to stderr""" + error_data = {"error": message, "traceback": str(error) if error else None} + print(json.dumps(error_data), file=sys.stderr) def import_server_module(): @@ -37,6 +57,7 @@ def import_server_module(): spec.loader.exec_module(server_module) return server_module except Exception as e: + log_error("Could not import server module", e) raise ImportError(f"Could not import server module: {e}") raise ImportError( @@ -87,9 +108,7 @@ def run( SUBCOMMAND: should be 'server' """ if subcommand != "server": - click.echo( - f"Unknown subcommand: {subcommand}. Only 'server' is supported.", err=True - ) + log_error(f"Unknown subcommand: {subcommand}. Only 'server' is supported.") sys.exit(1) try: @@ -131,11 +150,7 @@ def run( sys.argv = old_argv except Exception as e: - import traceback - - click.echo(f"Error starting server: {e}", err=True) - click.echo("Detailed error:", err=True) - click.echo(traceback.format_exc(), err=True) + log_error("Error starting server", e) sys.exit(1) diff --git a/uv.lock b/uv.lock index 8d2b758..b6c267e 100644 --- a/uv.lock +++ b/uv.lock @@ -1,4 +1,5 @@ version = 1 +revision = 1 requires-python = ">=3.11, <4.0" resolution-markers = [ "python_full_version >= '3.12.4'", @@ -143,6 +144,7 @@ dependencies = [ { name = "playwright" }, { name = "pydantic" }, { name = "python-dotenv" }, + { name = "python-json-logger" }, { name = "starlette" }, { name = "uvicorn" }, ] @@ -181,9 +183,11 @@ requires-dist = [ { name = "pytest-asyncio", marker = "extra == 'test'", specifier = ">=0.21.0" }, { name = "pytest-cov", marker = "extra == 'test'", specifier = ">=4.1.0" }, { name = "python-dotenv" }, + { name = "python-json-logger", specifier = ">=2.0.7" }, { name = "starlette" }, { name = "uvicorn" }, ] +provides-extras = ["dev", "test"] [[package]] name = "certifi" @@ -1034,6 +1038,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6a/3e/b68c118422ec867fa7ab88444e1274aa40681c606d59ac27de5a5588f082/python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a", size = 19863 }, ] +[[package]] +name = "python-json-logger" +version = "3.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/9e/de/d3144a0bceede957f961e975f3752760fbe390d57fbe194baf709d8f1f7b/python_json_logger-3.3.0.tar.gz", hash = "sha256:12b7e74b17775e7d565129296105bbe3910842d9d0eb083fc83a6a617aa8df84", size = 16642 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/08/20/0f2523b9e50a8052bc6a8b732dfc8568abbdc42010aef03a2d750bdab3b2/python_json_logger-3.3.0-py3-none-any.whl", hash = "sha256:dd980fae8cffb24c13caf6e158d3d61c0d6d22342f932cb6e9deedab3d35eec7", size = 15163 }, +] + [[package]] name = "pyyaml" version = "6.0.2"