browser-use · ghost · Jun 10, 2025 · Jun 11, 2025 · Jun 11, 2025 · Jun 12, 2025
diff --git a/.dockerignore b/.dockerignore
@@ -2,4 +2,17 @@ data
 tmp
 results
 
-.env
+.env
+.venv/
+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+.Python
+*.db
+*.sqlite3
+*.log
+*.mp4
+*.avi
+*.mkv
+*.webm
diff --git a/.gitignore b/.gitignore
@@ -189,4 +189,11 @@ data/
 .config.pkl
 *.pdf
 
-workflow
+workflow.env
+.venv/
+
+
+
+#ssh keys poublic and private 
+"eval \"$(ssh-agent -s)\""
+"eval \"$(ssh-agent -s)\".pub"
diff --git a/Dockerfile b/Dockerfile
@@ -44,6 +44,16 @@ RUN apt-get update && apt-get install -y \
     fonts-dejavu-core \
     fonts-dejavu-extra \
     vim \
+    # Video recording dependencies
+    ffmpeg \
+    libavcodec-extra \
+    libavformat-dev \
+    libavutil-dev \
+    libswscale-dev \
+    libx264-dev \
+    libx265-dev \
+    libvpx-dev \
+    libwebp-dev \
     && rm -rf /var/lib/apt/lists/*
 
 # Install noVNC
@@ -65,6 +75,9 @@ RUN node -v && npm -v && npx -v
 # Set up working directory
 WORKDIR /app
 
+# Add src directory to Python path for imports
+ENV PYTHONPATH=/app/src:/app
+
 # Copy requirements and install Python dependencies
 COPY requirements.txt .
 

diff --git a/LIVE_BROWSER_README.md b/LIVE_BROWSER_README.md
@@ -0,0 +1,164 @@
+# 🧪 Live Browser Testing Agent
+
+This system now supports **real-time browser automation viewing** directly in your frontend! Instead of watching video recordings after the fact, you can see the browser automation happening live as it occurs.
+
+## 🎯 What's New
+
+### ✅ **Live Browser Viewing**
+- **Real-time automation** visible in your frontend
+- **Live mouse movements** and clicks
+- **Page navigation** and form filling
+- **Immediate feedback** when agent starts working
+- **Final browser state** remains visible after completion
+
+### ✅ **Two Interface Options**
+1. **Simple HTML Frontend** (`static/index.html`) - Clean, focused interface
+2. **Gradio WebUI** (`http://localhost:7788`) - Full-featured interface with live VNC
+
+## 🚀 Quick Start
+
+### Option 1: Automated Setup (Recommended)
+```bash
+# Run the startup script
+python start_live_browser.py
+```
+
+### Option 2: Manual Setup
+```bash
+# Start Docker with VNC support
+docker compose up --build
+
+# Wait for services to start (about 30 seconds)
+# Then access your interface
+```
+
+## 📱 Access Your Applications
+
+| Service | URL | Purpose |
+|---------|-----|---------|
+| **Simple Frontend** | `http://localhost:7788` | Clean HTML interface |
+| **Gradio WebUI** | `http://localhost:7788` | Full-featured interface |
+| **VNC Viewer** | `http://localhost:6080/vnc.html` | Direct VNC access |
+| **VNC Password** | `youvncpassword` | Default password |
+
+## 🎨 How It Works
+
+### **Before Agent Runs:**
+- Clean browser window (empty or showing your app)
+- Status indicator showing "Ready"
+
+### **During Agent Execution:**
+- **Real-time browser automation** happening right in the UI
+- **Live mouse movements** and clicks
+- **Page navigation** and form filling
+- **Screenshot updates** as the agent works
+
+### **After Agent Completes:**
+- Final state of the browser
+- Results visible in the browser
+- Status showing "Completed"
+
+## 🔧 Technical Details
+
+### **VNC Architecture**
+```
+User Frontend → VNC Viewer (port 6080) → VNC Server (port 5901) → Virtual Display (:99) → Browser
+```
+
+### **Components**
+- **Xvfb**: Virtual display server (`:99`)
+- **x11vnc**: VNC server sharing the virtual display
+- **noVNC**: Web-based VNC client
+- **Supervisor**: Manages all services
+
+### **Browser Configuration**
+- **Headless**: `False` (browser visible for VNC)
+- **Window Size**: 1280x1100
+- **Display**: `:99` (virtual display)
+
+## 🛠️ Troubleshooting
+
+### **VNC Not Showing**
+1. Check if Docker is running: `docker ps`
+2. Verify VNC service: `docker logs <container_name>`
+3. Check ports: `netstat -an | grep 6080`
+
+### **Browser Not Visible**
+1. Ensure `headless=False` in browser config
+2. Check if virtual display is working
+3. Verify VNC connection
+
+### **Performance Issues**
+1. Reduce VNC quality settings
+2. Increase Docker memory allocation
+3. Close unnecessary browser tabs
+
+## 🔒 Security Notes
+
+- **VNC Password**: Change default password in `.env` file
+- **Network Access**: VNC is only accessible on localhost by default
+- **Browser Isolation**: Each session runs in isolated container
+
+## 📝 Configuration
+
+### **Environment Variables**
+```bash
+# VNC Settings
+VNC_PASSWORD=your_custom_password
+RESOLUTION=1920x1080x24
+
+# Browser Settings
+DISPLAY=:99
+USE_OWN_BROWSER=false
+KEEP_BROWSER_OPEN=true
+```
+
+### **Custom VNC Settings**
+Edit `supervisord.conf` to modify:
+- VNC port (default: 5901)
+- Display resolution
+- Authentication settings
+
+## 🎯 Usage Examples
+
+### **Simple Test**
+1. Open `http://localhost:7788`
+2. Enter query: "Click the login button"
+3. Enter URL: "https://example.com"
+4. Click "Start Live Test"
+5. Watch the browser automation happen live!
+
+### **Complex Workflow**
+1. Start with simple navigation
+2. Watch form filling in real-time
+3. See error handling and retries
+4. Observe final state and results
+
+## 🚀 Advanced Features
+
+### **Multiple Browser Sessions**
+- Each test runs in isolated browser context
+- No interference between concurrent tests
+- Clean state for each automation
+
+### **Debugging Support**
+- Live view helps identify automation issues
+- Real-time feedback on agent decisions
+- Visual confirmation of actions
+
+### **Integration Options**
+- Embed VNC viewer in any web application
+- Customize VNC viewer appearance
+- Add status indicators and controls
+
+## 📞 Support
+
+If you encounter issues:
+1. Check the Docker logs: `docker compose logs`
+2. Verify all services are running
+3. Ensure ports are not blocked
+4. Check browser console for errors
+
+---
+
+**🎉 Enjoy your live browser automation experience!** 
diff --git a/changes.txt b/changes.txt
@@ -0,0 +1,40 @@
+browser-use-agent-tab.py
+-------------------------
+- Removed all extra/unused variables.
+- Rewrote run_agent_task() without using gradio, webui_manager, or other UI dependencies.
+- Commented out all unused functions:
+    - pause_button
+    - resume_button
+    - _ask_assistant_callback
+    - handle_done
+    - handle_new_step
+    - _get_config_value
+    - _format_agent_output
+- Created a FastAPI endpoint to run main-agent-task.
+- Created a "static/" folder containing a simple UI for "Website Testing Agent".
+
+browser_recorder.py
+--------------------
+- Cleans the entire video directory before starting a new recording session.
+- Uses glob.glob() to recursively find .webm files after context closure.
+- Does not use page.on() or page.video.start(); relies on Playwright's built-in recording mechanism.
+- Stores just the filenames in a list: self.recorded_videos.
+
+agent/mainagent.py
+-------------------
+- Modified loop logic to ensure the agent runs correctly only when required.
+
+agent/qa_possibility_checker/
+------------------------------
+- Updated prompt.py with refined prompt structure.
+- Added custom_validate() function in output.py for validating the agent output.
+
+agent/intent_classifier/
+------------------------------
+- Added custom_validate() function in output.py for validating the agent output.
+
+agent/prompt_enhancer/
+------------------------------
+- Added custom_validate() function in output.py for validating the agent output.
+
+
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -63,10 +63,15 @@ services:
 
       # VNC Settings
       - VNC_PASSWORD=${VNC_PASSWORD:-youvncpassword}
+
+      # Python Path Settings
+      - PYTHONPATH=/app/src
 
     volumes:
       - /tmp/.X11-unix:/tmp/.X11-unix
       # - ./my_chrome_data:/app/data/chrome_data # Optional: persist browser data
+      # Mount output directory for saving screenshots, videos, and agent data
+      - ./src/outputdata:/app/src/outputdata
     restart: unless-stopped
     shm_size: '2gb'
     cap_add:

diff --git a/requirements.txt b/requirements.txt
@@ -8,3 +8,6 @@ langchain-ibm==0.3.10
 langchain_mcp_adapters==0.0.9
 langgraph==0.3.34
 langchain-community
+langchain-ollama
+fastapi
+uvicorn[standard]
diff --git a/src/API/main.py b/src/API/main.py
@@ -0,0 +1,77 @@
+from fastapi import FastAPI, WebSocket, WebSocketDisconnect, HTTPException
+from fastapi.staticfiles import StaticFiles
+from fastapi.responses import FileResponse
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+from pathlib import Path
+import os
+
+from src.webui.components.browser_use_agent_tab import run_agent_task
+from src.websocket.websocket_manager import WebSocketManager
+
+app = FastAPI()
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+manager = WebSocketManager()
+
+# Mount static files
+app.mount("/static", StaticFiles(directory=os.getcwd()), name="static")
+
+# Set display environment for Docker (headless)
+if not os.getenv("DISPLAY"):
+    os.environ["DISPLAY"] = ":99"
+
+
+# 🧠 Request body model
+class AgentRequest(BaseModel):
+    query: str
+    url: str
+
+
+# 🎯 Run agent task and send logs via WebSocket
+@app.post("/run-agent")
+
+async def run_agent(request: AgentRequest):
+    try:
+        print(f"🔄 Starting agent with DISPLAY={os.getenv('DISPLAY')}")
+
+        async def message_callback(message: str):
+            await manager.send_message(message)
+
+        result = await run_agent_task(request.query, request.url, message_callback=message_callback)
+
+        return {
+            "status": "success",
+            "task_id": result["task_id"],
+            "final_result": result["final_result"]
+        }
+    except Exception as e:
+        print(f"❌ Agent error: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+# ⛔ Optional Stop Agent
+@app.post("/stop-agent")
+def stop_agent():
+    return {"status": "stopped"}
+
+
+# 🌐 Serve frontend
+@app.get("/")
+async def serve_frontend():
+    return FileResponse("static/index.html")
+
+
+@app.websocket("/ws")
+async def websocket_endpoint(websocket: WebSocket):
+    await manager.connect(websocket)
+    try:
+        while True:
+            await websocket.receive_text()  # keep alive
+    except:
+        manager.disconnect(websocket)
diff --git a/src/agent/browser_use/browser_use_agent.py b/src/agent/browser_use/browser_use_agent.py
@@ -26,7 +26,6 @@
         os.environ.get("SKIP_LLM_API_KEY_VERIFICATION", "false").lower()[0] in "ty1"
 )
 
-
 class BrowserUseAgent(Agent):
     def _set_tool_calling_method(self) -> ToolCallingMethod | None:
         tool_calling_method = self.settings.tool_calling_method
@@ -129,6 +128,7 @@ async def run(
                 )
 
                 logger.info(f'❌ {error_message}')
+
 
             return self.state.history
-Original file line number
+Diff line change
@@ Expand Up / @@ -2,4 +2,17 @@ data @@
     tmp
     results
-    .env
+    .env
+    .venv/
+    __pycache__/
+    *.pyc
+    *.pyo
+    *.pyd
+    .Python
+    *.db
+    *.sqlite3
+    *.log
+    *.mp4
+    *.avi
+    *.mkv
+    *.webm