From 36e64cfa5b0561f0257484f3079d71a3c8b8e721 Mon Sep 17 00:00:00 2001 From: Samel Yossef <168319629+Copyxyzai@users.noreply.github.com> Date: Mon, 22 Sep 2025 15:08:31 +0000 Subject: [PATCH] Checkpoint from VS Code for coding agent session --- .env.example | 71 ------------------------ .github/copilot-instructions.md | 97 +++++++++++++++++++++++++++++++++ .vscode/settings.json | 4 +- 3 files changed, 100 insertions(+), 72 deletions(-) delete mode 100644 .env.example create mode 100644 .github/copilot-instructions.md diff --git a/.env.example b/.env.example deleted file mode 100644 index 000f11c4..00000000 --- a/.env.example +++ /dev/null @@ -1,71 +0,0 @@ -OPENAI_ENDPOINT=https://api.openai.com/v1 -OPENAI_API_KEY= - -ANTHROPIC_API_KEY= -ANTHROPIC_ENDPOINT=https://api.anthropic.com - -GOOGLE_API_KEY= - -AZURE_OPENAI_ENDPOINT= -AZURE_OPENAI_API_KEY= -AZURE_OPENAI_API_VERSION=2025-01-01-preview - -DEEPSEEK_ENDPOINT=https://api.deepseek.com -DEEPSEEK_API_KEY= - -MISTRAL_API_KEY= -MISTRAL_ENDPOINT=https://api.mistral.ai/v1 - -OLLAMA_ENDPOINT=http://localhost:11434 - -ALIBABA_ENDPOINT=https://dashscope.aliyuncs.com/compatible-mode/v1 -ALIBABA_API_KEY= - -MODELSCOPE_ENDPOINT=https://api-inference.modelscope.cn/v1 -MODELSCOPE_API_KEY= - -MOONSHOT_ENDPOINT=https://api.moonshot.cn/v1 -MOONSHOT_API_KEY= - -UNBOUND_ENDPOINT=https://api.getunbound.ai -UNBOUND_API_KEY= - -SiliconFLOW_ENDPOINT=https://api.siliconflow.cn/v1/ -SiliconFLOW_API_KEY= - -IBM_ENDPOINT=https://us-south.ml.cloud.ibm.com -IBM_API_KEY= -IBM_PROJECT_ID= - -GROK_ENDPOINT="https://api.x.ai/v1" -GROK_API_KEY= - -#set default LLM -DEFAULT_LLM=openai - - -# Set to false to disable anonymized telemetry -ANONYMIZED_TELEMETRY=false - -# LogLevel: Set to debug to enable verbose logging, set to result to get results only. Available: result | debug | info -BROWSER_USE_LOGGING_LEVEL=info - -# Browser settings -BROWSER_PATH= -BROWSER_USER_DATA= -BROWSER_DEBUGGING_PORT=9222 -BROWSER_DEBUGGING_HOST=localhost -# Set to true to keep browser open between AI tasks -KEEP_BROWSER_OPEN=true -USE_OWN_BROWSER=false -BROWSER_CDP= -# Display settings -# Format: WIDTHxHEIGHTxDEPTH -RESOLUTION=1920x1080x24 -# Width in pixels -RESOLUTION_WIDTH=1920 -# Height in pixels -RESOLUTION_HEIGHT=1080 - -# VNC settings -VNC_PASSWORD=youvncpassword diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md new file mode 100644 index 00000000..4c63e5ac --- /dev/null +++ b/.github/copilot-instructions.md @@ -0,0 +1,97 @@ +# Browser Use Web UI Development Guide + +## Project Overview +Browser Use Web UI is a Gradio-based web interface for browser automation agents built on the browser-use library (https://github.com/browser-use/browser-use). The architecture follows a modular design with clear separation between browser control, agent logic, and UI components. + +## Core Architecture + +### Key Components +- **`src/agent/`**: Agent implementations (`BrowserUseAgent`, `DeepResearchAgent`) +- **`src/browser/`**: Custom browser and context wrappers extending browser-use +- **`src/controller/`**: Custom action controller with MCP tool integration +- **`src/webui/`**: Gradio interface with modular tab components +- **`src/utils/`**: Configuration, LLM providers, and utilities + +### Critical Files +- `webui.py`: Main entry point with CLI args for IP/port/theme +- `src/webui/interface.py`: UI assembly with tab-based layout +- `src/controller/custom_controller.py`: Action registry with custom actions +- `src/utils/config.py`: LLM provider definitions and model mappings + +## Development Patterns + +### LLM Provider Integration +Follow the pattern in `src/utils/config.py` for new providers: +```python +PROVIDER_DISPLAY_NAMES = {"provider_key": "Display Name"} +model_names = {"provider_key": ["model1", "model2"]} +``` + +### Custom Actions +Register new browser actions in `CustomController._register_custom_actions()`: +```python +@self.registry.action("Description of when to use this action") +async def action_name(param: str, browser: BrowserContext): + # Implementation + return ActionResult(extracted_content=result, include_in_memory=True) +``` + +### UI Components +Create new tabs as separate files in `src/webui/components/` following the pattern: +```python +def create_tab_name_tab(ui_manager: WebuiManager): + with gr.Column(): + # Gradio components + pass +``` + +### Agent Implementations +Extend `BrowserUseAgent` or create new agents inheriting from browser-use's `Agent` class. Key patterns: +- Override `_set_tool_calling_method()` for model-specific configurations +- Use `@time_execution_async("--run (agent)")` for performance tracking +- Implement proper cleanup in `finally` blocks + +## Configuration & Environment + +### Environment Variables +- LLM API keys: `{PROVIDER}_API_KEY`, `{PROVIDER}_ENDPOINT` +- Browser: `BROWSER_PATH`, `BROWSER_USER_DATA`, `BROWSER_DEBUGGING_PORT` +- Logging: `BROWSER_USE_LOGGING_LEVEL`, `ANONYMIZED_TELEMETRY` + +### Docker Development +- Uses supervisord for multi-service orchestration (VNC, noVNC, WebUI) +- VNC available at `:6080/vnc.html` for browser observation +- Playwright browsers installed to `/ms-browsers` + +## Testing & Running + +### Local Development +```bash +python webui.py --ip 127.0.0.1 --port 7788 --theme Ocean +``` + +### Test Structure +Tests in `tests/` demonstrate usage patterns: +- `test_agents.py`: Complete agent workflows with LLM/browser setup +- MCP server integration examples with desktop-commander + +### Docker Deployment +```bash +docker compose up --build +# For ARM64: TARGETPLATFORM=linux/arm64 docker compose up --build +``` + +## MCP Integration +Model Context Protocol (MCP) tools are dynamically registered via `setup_mcp_client()`. Tools are prefixed with `mcp.{server_name}.{tool_name}` and integrated into the action registry. + +## Browser Configuration +- Custom browser extends browser-use's Browser class +- Supports persistent sessions via `BROWSER_USER_DATA` +- Window dimensions configurable via `window_w`, `window_h` variables +- Use `use_own_browser=True` for existing Chrome profiles + +## Key Dependencies +- `browser-use==0.1.48`: Core browser automation +- `gradio==5.27.0`: Web UI framework +- `langchain-*`: LLM provider adapters +- `playwright`: Browser automation backend \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json index 8b09300d..9fc31550 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -7,5 +7,7 @@ "source.fixAll.ruff": "explicit", "source.organizeImports.ruff": "explicit" } - } + }, + "python-envs.defaultEnvManager": "ms-python.python:system", + "python-envs.pythonProjects": [] }