Skip to content

Commit 3e7551c

Browse files
authored
Merge branch 'browser-use:main' into feat/qwen-support
2 parents 1eb4b30 + 84b8965 commit 3e7551c

File tree

9 files changed

+160
-33
lines changed

9 files changed

+160
-33
lines changed

.env.example

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ GOOGLE_API_KEY=
88

99
AZURE_OPENAI_ENDPOINT=
1010
AZURE_OPENAI_API_KEY=
11+
AZURE_OPENAI_API_VERSION=2025-01-01-preview
1112

1213
DEEPSEEK_ENDPOINT=https://api.deepseek.com
1314
DEEPSEEK_API_KEY=

Dockerfile

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,8 @@ RUN git clone https://github.com/novnc/noVNC.git /opt/novnc \
4747
&& git clone https://github.com/novnc/websockify /opt/novnc/utils/websockify \
4848
&& ln -s /opt/novnc/vnc.html /opt/novnc/index.html
4949

50-
# Install Chrome
51-
RUN curl -fsSL https://dl.google.com/linux/linux_signing_key.pub | gpg --dearmor -o /usr/share/keyrings/google-chrome.gpg \
52-
&& echo "deb [arch=amd64 signed-by=/usr/share/keyrings/google-chrome.gpg] http://dl.google.com/linux/chrome/deb/ stable main" | tee /etc/apt/sources.list.d/google-chrome.list
50+
# Set platform for ARM64 compatibility
51+
ARG TARGETPLATFORM=linux/amd64
5352

5453
# Set up working directory
5554
WORKDIR /app
@@ -62,15 +61,14 @@ RUN pip install --no-cache-dir -r requirements.txt
6261
ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
6362
RUN playwright install --with-deps chromium
6463
RUN playwright install-deps
65-
RUN apt-get install -y google-chrome-stable
6664

6765
# Copy the application code
6866
COPY . .
6967

7068
# Set environment variables
7169
ENV PYTHONUNBUFFERED=1
7270
ENV BROWSER_USE_LOGGING_LEVEL=info
73-
ENV CHROME_PATH=/usr/bin/google-chrome
71+
ENV CHROME_PATH=/ms-playwright/chromium-*/chrome-linux/chrome
7472
ENV ANONYMIZED_TELEMETRY=false
7573
ENV DISPLAY=:99
7674
ENV RESOLUTION=1920x1080x24
@@ -83,6 +81,6 @@ ENV RESOLUTION_HEIGHT=1080
8381
RUN mkdir -p /var/log/supervisor
8482
COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
8583

86-
EXPOSE 7788 6080 5900
84+
EXPOSE 7788 6080 5901
8785

8886
CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]

Dockerfile.arm64

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
FROM python:3.11-slim
2+
3+
# Install system dependencies
4+
RUN apt-get update && apt-get install -y \
5+
wget \
6+
gnupg \
7+
curl \
8+
unzip \
9+
xvfb \
10+
libgconf-2-4 \
11+
libxss1 \
12+
libnss3 \
13+
libnspr4 \
14+
libasound2 \
15+
libatk1.0-0 \
16+
libatk-bridge2.0-0 \
17+
libcups2 \
18+
libdbus-1-3 \
19+
libdrm2 \
20+
libgbm1 \
21+
libgtk-3-0 \
22+
libxcomposite1 \
23+
libxdamage1 \
24+
libxfixes3 \
25+
libxrandr2 \
26+
xdg-utils \
27+
fonts-liberation \
28+
dbus \
29+
xauth \
30+
xvfb \
31+
x11vnc \
32+
tigervnc-tools \
33+
supervisor \
34+
net-tools \
35+
procps \
36+
git \
37+
python3-numpy \
38+
fontconfig \
39+
fonts-dejavu \
40+
fonts-dejavu-core \
41+
fonts-dejavu-extra \
42+
&& rm -rf /var/lib/apt/lists/*
43+
44+
# Install noVNC
45+
RUN git clone https://github.com/novnc/noVNC.git /opt/novnc \
46+
&& git clone https://github.com/novnc/websockify /opt/novnc/utils/websockify \
47+
&& ln -s /opt/novnc/vnc.html /opt/novnc/index.html
48+
49+
# Set platform explicitly for ARM64
50+
ARG TARGETPLATFORM=linux/arm64
51+
52+
# Set up working directory
53+
WORKDIR /app
54+
55+
# Copy requirements and install Python dependencies
56+
COPY requirements.txt .
57+
RUN pip install --no-cache-dir -r requirements.txt
58+
59+
# Install Playwright and browsers with system dependencies optimized for ARM64
60+
ENV PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
61+
RUN PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD=1 pip install playwright && \
62+
playwright install --with-deps chromium
63+
64+
# Copy the application code
65+
COPY . .
66+
67+
# Set environment variables
68+
ENV PYTHONUNBUFFERED=1
69+
ENV BROWSER_USE_LOGGING_LEVEL=info
70+
ENV CHROME_PATH=/ms-playwright/chromium-*/chrome-linux/chrome
71+
ENV ANONYMIZED_TELEMETRY=false
72+
ENV DISPLAY=:99
73+
ENV RESOLUTION=1920x1080x24
74+
ENV VNC_PASSWORD=vncpassword
75+
ENV CHROME_PERSISTENT_SESSION=true
76+
ENV RESOLUTION_WIDTH=1920
77+
ENV RESOLUTION_HEIGHT=1080
78+
79+
# Set up supervisor configuration
80+
RUN mkdir -p /var/log/supervisor
81+
COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
82+
83+
EXPOSE 7788 6080 5901
84+
85+
CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]

README.md

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ docker compose up --build
117117
CHROME_PERSISTENT_SESSION=true docker compose up --build
118118
```
119119

120+
120121
4. Access the Application:
121122
- Web Interface: Open `http://localhost:7788` in your browser
122123
- VNC Viewer (for watching browser interactions): Open `http://localhost:6080/vnc.html`
@@ -183,7 +184,11 @@ CHROME_PERSISTENT_SESSION=true docker compose up --build
183184
VNC_PASSWORD=your_vnc_password # Optional, defaults to "vncpassword"
184185
```
185186

186-
2. **Browser Persistence Modes:**
187+
2. **Platform Support:**
188+
- Supports both AMD64 and ARM64 architectures
189+
- For ARM64 systems (e.g., Apple Silicon Macs), the container will automatically use the appropriate image
190+
191+
3. **Browser Persistence Modes:**
187192
- **Default Mode (CHROME_PERSISTENT_SESSION=false):**
188193
- Browser opens and closes with each AI task
189194
- Clean state for each interaction
@@ -195,12 +200,13 @@ CHROME_PERSISTENT_SESSION=true docker compose up --build
195200
- Allows viewing previous AI interactions
196201
- Set in `.env` file or via environment variable when starting container
197202

198-
3. **Viewing Browser Interactions:**
203+
4. **Viewing Browser Interactions:**
199204
- Access the noVNC viewer at `http://localhost:6080/vnc.html`
200205
- Enter the VNC password (default: "vncpassword" or what you set in VNC_PASSWORD)
206+
- Direct VNC access available on port 5900 (mapped to container port 5901)
201207
- You can now see all browser interactions in real-time
202208

203-
4. **Container Management:**
209+
5. **Container Management:**
204210
```bash
205211
# Start with persistent browser
206212
CHROME_PERSISTENT_SESSION=true docker compose up -d

docker-compose.yml

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,13 @@ services:
33
platform: linux/amd64
44
build:
55
context: .
6-
dockerfile: Dockerfile
6+
dockerfile: ${DOCKERFILE:-Dockerfile}
7+
args:
8+
TARGETPLATFORM: ${TARGETPLATFORM:-linux/amd64}
79
ports:
810
- "7788:7788" # Gradio default port
911
- "6080:6080" # noVNC web interface
10-
- "5900:5900" # VNC port
12+
- "5901:5901" # VNC port
1113
- "9222:9222" # Chrome remote debugging port
1214
environment:
1315
- OPENAI_ENDPOINT=${OPENAI_ENDPOINT:-https://api.openai.com/v1}
@@ -42,7 +44,7 @@ services:
4244
tmpfs:
4345
- /tmp
4446
healthcheck:
45-
test: ["CMD", "nc", "-z", "localhost", "5900"]
47+
test: ["CMD", "nc", "-z", "localhost", "5901"]
4648
interval: 10s
4749
timeout: 5s
4850
retries: 3

entrypoint.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
#!/bin/bash
2+
3+
# Start supervisord in the foreground to properly manage child processes
4+
exec /usr/bin/supervisord -n -c /etc/supervisor/conf.d/supervisord.conf

src/utils/deep_research.py

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,23 @@ async def deep_research(task, llm, agent_state=None, **kwargs):
269269
logger.info("\nFinish Searching, Start Generating Report...")
270270

271271
# 5. Report Generation in Markdown (or JSON if you prefer)
272+
return await generate_final_report(task, history_infos, save_dir, llm)
273+
274+
except Exception as e:
275+
logger.error(f"Deep research Error: {e}")
276+
return await generate_final_report(task, history_infos, save_dir, llm, str(e))
277+
finally:
278+
if browser:
279+
await browser.close()
280+
if browser_context:
281+
await browser_context.close()
282+
logger.info("Browser closed.")
283+
284+
async def generate_final_report(task, history_infos, save_dir, llm, error_msg=None):
285+
"""Generate report from collected information with error handling"""
286+
try:
287+
logger.info("\nAttempting to generate final report from collected data...")
288+
272289
writer_system_prompt = """
273290
You are a **Deep Researcher** and a professional report writer tasked with creating polished, high-quality reports that fully meet the user's needs, based on the user's instructions and the relevant information provided. You will write the report using Markdown format, ensuring it is both informative and visually appealing.
274291
@@ -314,21 +331,21 @@ async def deep_research(task, llm, agent_state=None, **kwargs):
314331
logger.info(ai_report_msg.reasoning_content)
315332
logger.info("🤯 End Report Deep Thinking")
316333
report_content = ai_report_msg.content
317-
# Remove ```markdown or ``` at the *very beginning* and ``` at the *very end*, with optional whitespace
318334
report_content = re.sub(r"^```\s*markdown\s*|^\s*```|```\s*$", "", report_content, flags=re.MULTILINE)
319335
report_content = report_content.strip()
336+
337+
# Add error notification to the report
338+
if error_msg:
339+
report_content = f"## ⚠️ Research Incomplete - Partial Results\n" \
340+
f"**The research process was interrupted by an error:** {error_msg}\n\n" \
341+
f"{report_content}"
342+
320343
report_file_path = os.path.join(save_dir, "final_report.md")
321344
with open(report_file_path, "w", encoding="utf-8") as f:
322345
f.write(report_content)
323346
logger.info(f"Save Report at: {report_file_path}")
324347
return report_content, report_file_path
325348

326-
except Exception as e:
327-
logger.error(f"Deep research Error: {e}")
328-
return "", None
329-
finally:
330-
if browser:
331-
await browser.close()
332-
if browser_context:
333-
await browser_context.close()
334-
logger.info("Browser closed.")
349+
except Exception as report_error:
350+
logger.error(f"Failed to generate partial report: {report_error}")
351+
return f"Error generating report: {str(report_error)}", None

src/utils/utils.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,10 +129,11 @@ def get_llm_model(provider: str, **kwargs):
129129
base_url = os.getenv("AZURE_OPENAI_ENDPOINT", "")
130130
else:
131131
base_url = kwargs.get("base_url")
132+
api_version = kwargs.get("api_version", "") or os.getenv("AZURE_OPENAI_API_VERSION", "2025-01-01-preview")
132133
return AzureChatOpenAI(
133134
model=kwargs.get("model_name", "gpt-4o"),
134135
temperature=kwargs.get("temperature", 0.0),
135-
api_version="2024-05-01-preview",
136+
api_version=api_version,
136137
azure_endpoint=base_url,
137138
api_key=api_key,
138139
)

supervisord.conf

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
[supervisord]
2+
user=root
23
nodaemon=true
34
logfile=/dev/stdout
45
logfile_maxbytes=0
@@ -13,6 +14,8 @@ stderr_logfile=/dev/stderr
1314
stderr_logfile_maxbytes=0
1415
priority=100
1516
startsecs=3
17+
stopsignal=TERM
18+
stopwaitsecs=10
1619

1720
[program:vnc_setup]
1821
command=bash -c "mkdir -p ~/.vnc && echo '%(ENV_VNC_PASSWORD)s' | vncpasswd -f > ~/.vnc/passwd && chmod 600 ~/.vnc/passwd && ls -la ~/.vnc/passwd"
@@ -25,28 +28,33 @@ stderr_logfile=/dev/stderr
2528
stderr_logfile_maxbytes=0
2629

2730
[program:x11vnc]
28-
command=bash -c "sleep 3 && DISPLAY=:99 x11vnc -display :99 -forever -shared -rfbauth /root/.vnc/passwd -rfbport 5900 -bg -o /var/log/x11vnc.log"
31+
command=bash -c "mkdir -p /var/log && touch /var/log/x11vnc.log && chmod 666 /var/log/x11vnc.log && sleep 5 && DISPLAY=:99 x11vnc -display :99 -forever -shared -rfbauth /root/.vnc/passwd -rfbport 5901 -o /var/log/x11vnc.log"
2932
autorestart=true
3033
stdout_logfile=/dev/stdout
3134
stdout_logfile_maxbytes=0
3235
stderr_logfile=/dev/stderr
3336
stderr_logfile_maxbytes=0
3437
priority=200
35-
startretries=5
36-
startsecs=5
37-
depends_on=vnc_setup
38+
startretries=10
39+
startsecs=10
40+
stopsignal=TERM
41+
stopwaitsecs=10
42+
depends_on=vnc_setup,xvfb
3843

3944
[program:x11vnc_log]
40-
command=tail -f /var/log/x11vnc.log
45+
command=bash -c "mkdir -p /var/log && touch /var/log/x11vnc.log && tail -f /var/log/x11vnc.log"
4146
autorestart=true
4247
stdout_logfile=/dev/stdout
4348
stdout_logfile_maxbytes=0
4449
stderr_logfile=/dev/stderr
4550
stderr_logfile_maxbytes=0
4651
priority=250
52+
stopsignal=TERM
53+
stopwaitsecs=5
54+
depends_on=x11vnc
4755

4856
[program:novnc]
49-
command=bash -c "sleep 5 && cd /opt/novnc && ./utils/novnc_proxy --vnc localhost:5900 --listen 0.0.0.0:6080 --web /opt/novnc"
57+
command=bash -c "sleep 5 && cd /opt/novnc && ./utils/novnc_proxy --vnc localhost:5901 --listen 0.0.0.0:6080 --web /opt/novnc"
5058
autorestart=true
5159
stdout_logfile=/dev/stdout
5260
stdout_logfile_maxbytes=0
@@ -58,15 +66,18 @@ startsecs=3
5866
depends_on=x11vnc
5967

6068
[program:persistent_browser]
61-
command=bash -c 'mkdir -p /app/data/chrome_data && sleep 8 && google-chrome --user-data-dir=/app/data/chrome_data --window-position=0,0 --window-size=%(ENV_RESOLUTION_WIDTH)s,%(ENV_RESOLUTION_HEIGHT)s --start-maximized --no-sandbox --disable-dev-shm-usage --disable-gpu --disable-software-rasterizer --disable-setuid-sandbox --no-first-run --no-default-browser-check --no-experiments --ignore-certificate-errors --remote-debugging-port=9222 --remote-debugging-address=0.0.0.0 "data:text/html,<html><body style=\"background: \#f0f0f0; margin: 0; display: flex; justify-content: center; align-items: center; height: 100vh; font-family: Arial;\"><h1>Browser Ready for AI Interaction</h1></body></html>"'
69+
environment=START_URL="data:text/html,<html><body><h1>Browser Ready</h1></body></html>"
70+
command=bash -c "mkdir -p /app/data/chrome_data && sleep 8 && $(find /ms-playwright/chromium-*/chrome-linux -name chrome) --user-data-dir=/app/data/chrome_data --window-position=0,0 --window-size=%(ENV_RESOLUTION_WIDTH)s,%(ENV_RESOLUTION_HEIGHT)s --start-maximized --no-sandbox --disable-dev-shm-usage --disable-gpu --disable-software-rasterizer --disable-setuid-sandbox --no-first-run --no-default-browser-check --no-experiments --ignore-certificate-errors --remote-debugging-port=9222 --remote-debugging-address=0.0.0.0 \"$START_URL\""
6271
autorestart=true
6372
stdout_logfile=/dev/stdout
6473
stdout_logfile_maxbytes=0
6574
stderr_logfile=/dev/stderr
6675
stderr_logfile_maxbytes=0
6776
priority=350
68-
startretries=3
69-
startsecs=3
77+
startretries=5
78+
startsecs=10
79+
stopsignal=TERM
80+
stopwaitsecs=15
7081
depends_on=novnc
7182

7283
[program:webui]
@@ -80,4 +91,6 @@ stderr_logfile_maxbytes=0
8091
priority=400
8192
startretries=3
8293
startsecs=3
83-
depends_on=persistent_browser
94+
stopsignal=TERM
95+
stopwaitsecs=10
96+
depends_on=persistent_browser

0 commit comments

Comments
 (0)