browser-use · ghost · Jun 10, 2025 · Jun 11, 2025 · Jun 11, 2025 · Jun 12, 2025
diff --git a/.dockerignore b/.dockerignore
@@ -2,4 +2,17 @@ data
 tmp
 results
 
-.env
+.env
+.venv/
+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+.Python
+*.db
+*.sqlite3
+*.log
+*.mp4
+*.avi
+*.mkv
+*.webm
diff --git a/.gitignore b/.gitignore
@@ -189,4 +189,11 @@ data/
 .config.pkl
 *.pdf
 
-workflow
+workflow.env
+.venv/
+
+
+
+#ssh keys poublic and private 
+"eval \"$(ssh-agent -s)\""
+"eval \"$(ssh-agent -s)\".pub"
diff --git a/Dockerfile b/Dockerfile
@@ -12,7 +12,7 @@ RUN apt-get update && apt-get install -y \
     curl \
     unzip \
     xvfb \
-    libgconf-2-4 \
+   # libgconf-2-4 \
     libxss1 \
     libnss3 \
     libnspr4 \
@@ -44,6 +44,16 @@ RUN apt-get update && apt-get install -y \
     fonts-dejavu-core \
     fonts-dejavu-extra \
     vim \
+    # Video recording dependencies
+    ffmpeg \
+    libavcodec-extra \
+    libavformat-dev \
+    libavutil-dev \
+    libswscale-dev \
+    libx264-dev \
+    libx265-dev \
+    libvpx-dev \
+    libwebp-dev \
     && rm -rf /var/lib/apt/lists/*
 
 # Install noVNC
@@ -65,6 +75,9 @@ RUN node -v && npm -v && npx -v
 # Set up working directory
 WORKDIR /app
 
+# Add src directory to Python path for imports
+ENV PYTHONPATH=/app/src:/app
+
 # Copy requirements and install Python dependencies
 COPY requirements.txt .
 
@@ -83,7 +96,7 @@ RUN mkdir -p $PLAYWRIGHT_BROWSERS_PATH
 # RUN playwright install chrome --with-deps
 
 # Alternative: Install Chromium if Google Chrome is problematic in certain environments
-RUN playwright install chromium --with-deps
+RUN playwright install chromium 
 
 
 # Copy the application code
@@ -96,4 +109,4 @@ COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
 EXPOSE 7788 6080 5901 9222
 
 CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]
-#CMD ["/bin/bash"]
+#CMD ["/bin/bash"]
diff --git a/LIVE_BROWSER_README.md b/LIVE_BROWSER_README.md
@@ -0,0 +1,164 @@
+# 🧪 Live Browser Testing Agent
+
+This system now supports **real-time browser automation viewing** directly in your frontend! Instead of watching video recordings after the fact, you can see the browser automation happening live as it occurs.
+
+## 🎯 What's New
+
+### ✅ **Live Browser Viewing**
+- **Real-time automation** visible in your frontend
+- **Live mouse movements** and clicks
+- **Page navigation** and form filling
+- **Immediate feedback** when agent starts working
+- **Final browser state** remains visible after completion
+
+### ✅ **Two Interface Options**
+1. **Simple HTML Frontend** (`static/index.html`) - Clean, focused interface
+2. **Gradio WebUI** (`http://localhost:7788`) - Full-featured interface with live VNC
+
+## 🚀 Quick Start
+
+### Option 1: Automated Setup (Recommended)
+```bash
+# Run the startup script
+python start_live_browser.py
+```
+
+### Option 2: Manual Setup
+```bash
+# Start Docker with VNC support
+docker compose up --build
+
+# Wait for services to start (about 30 seconds)
+# Then access your interface
+```
+
+## 📱 Access Your Applications
+
+| Service | URL | Purpose |
+|---------|-----|---------|
+| **Simple Frontend** | `http://localhost:7788` | Clean HTML interface |
+| **Gradio WebUI** | `http://localhost:7788` | Full-featured interface |
+| **VNC Viewer** | `http://localhost:6080/vnc.html` | Direct VNC access |
+| **VNC Password** | `youvncpassword` | Default password |
+
+## 🎨 How It Works
+
+### **Before Agent Runs:**
+- Clean browser window (empty or showing your app)
+- Status indicator showing "Ready"
+
+### **During Agent Execution:**
+- **Real-time browser automation** happening right in the UI
+- **Live mouse movements** and clicks
+- **Page navigation** and form filling
+- **Screenshot updates** as the agent works
+
+### **After Agent Completes:**
+- Final state of the browser
+- Results visible in the browser
+- Status showing "Completed"
+
+## 🔧 Technical Details
+
+### **VNC Architecture**
+```
+User Frontend → VNC Viewer (port 6080) → VNC Server (port 5901) → Virtual Display (:99) → Browser
+```
+
+### **Components**
+- **Xvfb**: Virtual display server (`:99`)
+- **x11vnc**: VNC server sharing the virtual display
+- **noVNC**: Web-based VNC client
+- **Supervisor**: Manages all services
+
+### **Browser Configuration**
+- **Headless**: `False` (browser visible for VNC)
+- **Window Size**: 1280x1100
+- **Display**: `:99` (virtual display)
+
+## 🛠️ Troubleshooting
+
+### **VNC Not Showing**
+1. Check if Docker is running: `docker ps`
+2. Verify VNC service: `docker logs <container_name>`
+3. Check ports: `netstat -an | grep 6080`
+
+### **Browser Not Visible**
+1. Ensure `headless=False` in browser config
+2. Check if virtual display is working
+3. Verify VNC connection
+
+### **Performance Issues**
+1. Reduce VNC quality settings
+2. Increase Docker memory allocation
+3. Close unnecessary browser tabs
+
+## 🔒 Security Notes
+
+- **VNC Password**: Change default password in `.env` file
+- **Network Access**: VNC is only accessible on localhost by default
+- **Browser Isolation**: Each session runs in isolated container
+
+## 📝 Configuration
+
+### **Environment Variables**
+```bash
+# VNC Settings
+VNC_PASSWORD=your_custom_password
+RESOLUTION=1920x1080x24
+
+# Browser Settings
+DISPLAY=:99
+USE_OWN_BROWSER=false
+KEEP_BROWSER_OPEN=true
+```
+
+### **Custom VNC Settings**
+Edit `supervisord.conf` to modify:
+- VNC port (default: 5901)
+- Display resolution
+- Authentication settings
+
+## 🎯 Usage Examples
+
+### **Simple Test**
+1. Open `http://localhost:7788`
+2. Enter query: "Click the login button"
+3. Enter URL: "https://example.com"
+4. Click "Start Live Test"
+5. Watch the browser automation happen live!
+
+### **Complex Workflow**
+1. Start with simple navigation
+2. Watch form filling in real-time
+3. See error handling and retries
+4. Observe final state and results
+
+## 🚀 Advanced Features
+
+### **Multiple Browser Sessions**
+- Each test runs in isolated browser context
+- No interference between concurrent tests
+- Clean state for each automation
+
+### **Debugging Support**
+- Live view helps identify automation issues
+- Real-time feedback on agent decisions
+- Visual confirmation of actions
+
+### **Integration Options**
+- Embed VNC viewer in any web application
+- Customize VNC viewer appearance
+- Add status indicators and controls
+
+## 📞 Support
+
+If you encounter issues:
+1. Check the Docker logs: `docker compose logs`
+2. Verify all services are running
+3. Ensure ports are not blocked
+4. Check browser console for errors
+
+---
+
+**🎉 Enjoy your live browser automation experience!** 
diff --git a/changes.txt b/changes.txt
@@ -0,0 +1,40 @@
+browser-use-agent-tab.py
+-------------------------
+- Removed all extra/unused variables.
+- Rewrote run_agent_task() without using gradio, webui_manager, or other UI dependencies.
+- Commented out all unused functions:
+    - pause_button
+    - resume_button
+    - _ask_assistant_callback
+    - handle_done
+    - handle_new_step
+    - _get_config_value
+    - _format_agent_output
+- Created a FastAPI endpoint to run main-agent-task.
+- Created a "static/" folder containing a simple UI for "Website Testing Agent".
+
+browser_recorder.py
+--------------------
+- Cleans the entire video directory before starting a new recording session.
+- Uses glob.glob() to recursively find .webm files after context closure.
+- Does not use page.on() or page.video.start(); relies on Playwright's built-in recording mechanism.
+- Stores just the filenames in a list: self.recorded_videos.
+
+agent/mainagent.py
+-------------------
+- Modified loop logic to ensure the agent runs correctly only when required.
+
+agent/qa_possibility_checker/
+------------------------------
+- Updated prompt.py with refined prompt structure.
+- Added custom_validate() function in output.py for validating the agent output.
+
+agent/intent_classifier/
+------------------------------
+- Added custom_validate() function in output.py for validating the agent output.
+
+agent/prompt_enhancer/
+------------------------------
+- Added custom_validate() function in output.py for validating the agent output.
+
+
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -12,6 +12,7 @@ services:
       - "6080:6080"
       - "5901:5901"
       - "9222:9222"
+      - "8000:8000"
     environment:
       # LLM API Keys & Endpoints
       - OPENAI_ENDPOINT=${OPENAI_ENDPOINT:-https://api.openai.com/v1}
@@ -60,13 +61,23 @@ services:
       - RESOLUTION=${RESOLUTION:-1920x1080x24}
       - RESOLUTION_WIDTH=${RESOLUTION_WIDTH:-1920}
       - RESOLUTION_HEIGHT=${RESOLUTION_HEIGHT:-1080}
-
+      - MONGO_URI=${MONGO_URI:-mongodb+srv://ahmadejaz:[email protected]/nextsqa_db}
+      - MONGODB_URI=${MONGODB_URI:-mongodb+srv://ahmadejaz:[email protected]/nextsqa_db}
+      - MONGO_DB_NAME=${MONGO_DB_NAME:-nextsqa_db}
+      - DB_NAME=${DB_NAME:-nextsqa_db}
       # VNC Settings
       - VNC_PASSWORD=${VNC_PASSWORD:-youvncpassword}
+
+      # Python Path Settings
+      - PYTHONPATH=/app/src
 
     volumes:
       - /tmp/.X11-unix:/tmp/.X11-unix
       # - ./my_chrome_data:/app/data/chrome_data # Optional: persist browser data
+      # Mount output directory for saving screenshots, videos, and agent data
+      - ./src/outputdata:/app/src/outputdata
+      # Mount the single root .env so load_dotenv can read it inside the container
+      - ./.env:/app/.env:ro
     restart: unless-stopped
     shm_size: '2gb'
     cap_add:

diff --git a/requirements.txt b/requirements.txt
@@ -8,3 +8,21 @@ langchain-ibm==0.3.10
 langchain_mcp_adapters==0.0.9
 langgraph==0.3.34
 langchain-community
+langchain-ollama
+# FastAPI and web framework
+fastapi
+uvicorn[standard]  # Includes websockets library automatically
+pydantic==2.10.6
+fastapi-mail==1.4.1
+# Database and authentication
+motor==3.4.0
+passlib[bcrypt]==1.7.4
+python-jose==3.3.0
+email-validator==2.1.1
+pymongo==4.5.0
+# Additional dependencies that might be needed
+python-multipart
+httpx
+requests
+pillow
+python-dotenv
diff --git a/src/API/Ai_Testing/models.py b/src/API/Ai_Testing/models.py
@@ -0,0 +1,31 @@
+from pydantic import BaseModel, Field
+from datetime import datetime, timezone
+from typing import Optional
+
+
+class ActionsModel(BaseModel):
+    action: dict = Field(..., description="The action taken by the agent")
+
+
+class StepsModel(BaseModel):
+    step: str = Field(..., description="The step taken by the agent")
+    step_no: int = Field(..., description="The step number in the sequence")
+    action: list[ActionsModel] = Field(..., description="The action taken in this step")
+    created_at: Optional[datetime] = Field(..., description="Timestamp of when the step was created")
+    updated_at: Optional[datetime] = Field(..., description="Timestamp of the last update")
+
+
+class ResultModel(BaseModel):
+
+    final_result: str = Field(..., description="The final result of the agent's task")
+    steps: list[StepsModel] = Field(..., description="List of steps taken by the agent")
+
+
+
+class AgentModel(BaseModel):
+    query: str = Field(..., description="The query to run the agent on")
+    url: str = Field(..., description="The URL to run the agent on")
+    result: ResultModel = Field(..., description="The result of the agent's task")
+    user_id: str = Field(..., description="The ID of the user who initiated the task")
+    created_at: datetime = Field(..., description="Timestamp of when the agent was created")
+    updated_at: datetime = Field(..., description="Timestamp of the last update")
diff --git a/src/API/Ai_Testing/routes.py b/src/API/Ai_Testing/routes.py
@@ -0,0 +1,10 @@
+from fastapi import APIRouter
+from .schemas import AgentRequest
+from .services import run_agent_work
+
+router = APIRouter()
+
+@router.post("/run-agent")
+async def run_agent(request: AgentRequest):    
+    return await run_agent_work(request.query, request.url, {"sub": "[email protected]"})
+
-Original file line number
+Diff line change
@@ Expand Up / @@ -2,4 +2,17 @@ data @@
     tmp
     results
-    .env
+    .env
+    .venv/
+    __pycache__/
+    *.pyc
+    *.pyo
+    *.pyd
+    .Python
+    *.db
+    *.sqlite3
+    *.log
+    *.mp4
+    *.avi
+    *.mkv
+    *.webm