Skip to content

Commit 47b5b55

Browse files
committed
opt browser
1 parent 3f4a7d9 commit 47b5b55

File tree

6 files changed

+234
-67
lines changed

6 files changed

+234
-67
lines changed
Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
from __future__ import annotations
2+
3+
import asyncio
4+
import gc
5+
import inspect
6+
import json
7+
import logging
8+
import os
9+
import re
10+
import time
11+
from pathlib import Path
12+
from typing import Any, Awaitable, Callable, Dict, Generic, List, Optional, TypeVar, Union
13+
14+
from dotenv import load_dotenv
15+
from langchain_core.language_models.chat_models import BaseChatModel
16+
from langchain_core.messages import (
17+
BaseMessage,
18+
HumanMessage,
19+
SystemMessage,
20+
)
21+
22+
# from lmnr.sdk.decorators import observe
23+
from pydantic import BaseModel, ValidationError
24+
25+
from browser_use.agent.gif import create_history_gif
26+
from browser_use.agent.memory.service import Memory, MemorySettings
27+
from browser_use.agent.message_manager.service import MessageManager, MessageManagerSettings
28+
from browser_use.agent.message_manager.utils import convert_input_messages, extract_json_from_model_output, save_conversation
29+
from browser_use.agent.prompts import AgentMessagePrompt, PlannerPrompt, SystemPrompt
30+
from browser_use.agent.views import (
31+
REQUIRED_LLM_API_ENV_VARS,
32+
ActionResult,
33+
AgentError,
34+
AgentHistory,
35+
AgentHistoryList,
36+
AgentOutput,
37+
AgentSettings,
38+
AgentState,
39+
AgentStepInfo,
40+
StepMetadata,
41+
ToolCallingMethod,
42+
)
43+
from browser_use.browser.browser import Browser
44+
from browser_use.browser.context import BrowserContext
45+
from browser_use.browser.views import BrowserState, BrowserStateHistory
46+
from browser_use.controller.registry.views import ActionModel
47+
from browser_use.controller.service import Controller
48+
from browser_use.dom.history_tree_processor.service import (
49+
DOMHistoryElement,
50+
HistoryTreeProcessor,
51+
)
52+
from browser_use.exceptions import LLMException
53+
from browser_use.telemetry.service import ProductTelemetry
54+
from browser_use.telemetry.views import (
55+
AgentEndTelemetryEvent,
56+
AgentRunTelemetryEvent,
57+
AgentStepTelemetryEvent,
58+
)
59+
from browser_use.utils import check_env_variables, time_execution_async, time_execution_sync
60+
from browser_use.agent.service import Agent, AgentHookFunc
61+
62+
load_dotenv()
63+
logger = logging.getLogger(__name__)
64+
65+
SKIP_LLM_API_KEY_VERIFICATION = os.environ.get('SKIP_LLM_API_KEY_VERIFICATION', 'false').lower()[0] in 'ty1'
66+
67+
68+
class BrowserUseAgent(Agent):
69+
@time_execution_async('--run (agent)')
70+
async def run(
71+
self, max_steps: int = 100, on_step_start: AgentHookFunc | None = None,
72+
on_step_end: AgentHookFunc | None = None
73+
) -> AgentHistoryList:
74+
"""Execute the task with maximum number of steps"""
75+
76+
loop = asyncio.get_event_loop()
77+
78+
# Set up the Ctrl+C signal handler with callbacks specific to this agent
79+
from browser_use.utils import SignalHandler
80+
81+
signal_handler = SignalHandler(
82+
loop=loop,
83+
pause_callback=self.pause,
84+
resume_callback=self.resume,
85+
custom_exit_callback=None, # No special cleanup needed on forced exit
86+
exit_on_second_int=True,
87+
)
88+
signal_handler.register()
89+
90+
# Wait for verification task to complete if it exists
91+
if hasattr(self, '_verification_task') and not self._verification_task.done():
92+
try:
93+
await self._verification_task
94+
except Exception:
95+
# Error already logged in the task
96+
pass
97+
98+
try:
99+
self._log_agent_run()
100+
101+
# Execute initial actions if provided
102+
if self.initial_actions:
103+
result = await self.multi_act(self.initial_actions, check_for_new_elements=False)
104+
self.state.last_result = result
105+
106+
for step in range(max_steps):
107+
# Check if waiting for user input after Ctrl+C
108+
while self.state.paused:
109+
await asyncio.sleep(0.5)
110+
if self.state.stopped:
111+
break
112+
113+
# Check if we should stop due to too many failures
114+
if self.state.consecutive_failures >= self.settings.max_failures:
115+
logger.error(f'❌ Stopping due to {self.settings.max_failures} consecutive failures')
116+
break
117+
118+
# Check control flags before each step
119+
if self.state.stopped:
120+
logger.info('Agent stopped')
121+
break
122+
123+
while self.state.paused:
124+
await asyncio.sleep(0.2) # Small delay to prevent CPU spinning
125+
if self.state.stopped: # Allow stopping while paused
126+
break
127+
128+
if on_step_start is not None:
129+
await on_step_start(self)
130+
131+
step_info = AgentStepInfo(step_number=step, max_steps=max_steps)
132+
await self.step(step_info)
133+
134+
if on_step_end is not None:
135+
await on_step_end(self)
136+
137+
if self.state.history.is_done():
138+
if self.settings.validate_output and step < max_steps - 1:
139+
if not await self._validate_output():
140+
continue
141+
142+
await self.log_completion()
143+
break
144+
else:
145+
logger.info('❌ Failed to complete task in maximum steps')
146+
147+
return self.state.history
148+
149+
except KeyboardInterrupt:
150+
# Already handled by our signal handler, but catch any direct KeyboardInterrupt as well
151+
logger.info('Got KeyboardInterrupt during execution, returning current history')
152+
return self.state.history
153+
154+
finally:
155+
# Unregister signal handlers before cleanup
156+
signal_handler.unregister()
157+
158+
self.telemetry.capture(
159+
AgentEndTelemetryEvent(
160+
agent_id=self.state.agent_id,
161+
is_done=self.state.history.is_done(),
162+
success=self.state.history.is_successful(),
163+
steps=self.state.n_steps,
164+
max_steps_reached=self.state.n_steps >= max_steps,
165+
errors=self.state.history.errors(),
166+
total_input_tokens=self.state.history.total_input_tokens(),
167+
total_duration_seconds=self.state.history.total_duration_seconds(),
168+
)
169+
)
170+
171+
await self.close()
172+
173+
if self.settings.generate_gif:
174+
output_path: str = 'agent_history.gif'
175+
if isinstance(self.settings.generate_gif, str):
176+
output_path = self.settings.generate_gif
177+
178+
create_history_gif(task=self.task, history=self.state.history, output_path=output_path)

src/browser/custom_browser.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -15,29 +15,30 @@
1515
import logging
1616

1717
from browser_use.browser.chrome import (
18-
CHROME_ARGS,
19-
CHROME_DETERMINISTIC_RENDERING_ARGS,
20-
CHROME_DISABLE_SECURITY_ARGS,
21-
CHROME_DOCKER_ARGS,
22-
CHROME_HEADLESS_ARGS,
18+
CHROME_ARGS,
19+
CHROME_DETERMINISTIC_RENDERING_ARGS,
20+
CHROME_DISABLE_SECURITY_ARGS,
21+
CHROME_DOCKER_ARGS,
22+
CHROME_HEADLESS_ARGS,
2323
)
2424
from browser_use.browser.context import BrowserContext, BrowserContextConfig
2525
from browser_use.browser.utils.screen_resolution import get_screen_resolution, get_window_adjustments
2626
from browser_use.utils import time_execution_async
2727
import socket
2828

29-
from .custom_context import CustomBrowserContext
29+
from .custom_context import CustomBrowserContext, CustomBrowserContextConfig
3030

3131
logger = logging.getLogger(__name__)
3232

3333

3434
class CustomBrowser(Browser):
3535

36-
async def new_context(
37-
self,
38-
config: BrowserContextConfig = BrowserContextConfig()
39-
) -> CustomBrowserContext:
40-
return CustomBrowserContext(config=config, browser=self)
36+
async def new_context(self, config: CustomBrowserContextConfig | None = None) -> CustomBrowserContext:
37+
"""Create a browser context"""
38+
browser_config = self.config.model_dump() if self.config else {}
39+
context_config = config.model_dump() if config else {}
40+
merged_config = {**browser_config, **context_config}
41+
return CustomBrowserContext(config=CustomBrowserContextConfig(**merged_config), browser=self)
4142

4243
async def _setup_builtin_browser(self, playwright: Playwright) -> PlaywrightBrowser:
4344
"""Sets up and returns a Playwright Browser instance with anti-detection measures."""

src/browser/custom_context.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
from browser_use.browser.context import BrowserContext, BrowserContextConfig
77
from playwright.async_api import Browser as PlaywrightBrowser
88
from playwright.async_api import BrowserContext as PlaywrightBrowserContext
9+
from typing import Optional
10+
from browser_use.browser.context import BrowserContextState
911

1012
logger = logging.getLogger(__name__)
1113

@@ -17,10 +19,11 @@ class CustomBrowserContextConfig(BrowserContextConfig):
1719
class CustomBrowserContext(BrowserContext):
1820
def __init__(
1921
self,
20-
browser: "Browser",
21-
config: CustomBrowserContextConfig = CustomBrowserContextConfig(),
22+
browser: 'Browser',
23+
config: BrowserContextConfig | None = None,
24+
state: Optional[BrowserContextState] = None,
2225
):
23-
super(CustomBrowserContext, self).__init__(browser=browser, config=config)
26+
super(CustomBrowserContext, self).__init__(browser=browser, config=config, state=state)
2427

2528
async def _create_context(self, browser: PlaywrightBrowser):
2629
"""Creates a new browser context with anti-detection measures and loads cookies if available."""

src/webui/components/browser_settings_tab.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,28 @@
11
import gradio as gr
2+
import logging
23
from gradio.components import Component
34

45
from src.webui.webui_manager import WebuiManager
56
from src.utils import config
67

8+
logger = logging.getLogger(__name__)
79

8-
def create_browser_settings_tab(webui_manager: WebuiManager) -> dict[str, Component]:
10+
async def close_browser(webui_manager: WebuiManager):
11+
"""
12+
Close browser
13+
"""
14+
if webui_manager.bu_current_task and not webui_manager.bu_current_task.done():
15+
webui_manager.bu_current_task.cancel()
16+
webui_manager.bu_current_task = None
17+
if webui_manager.bu_browser:
18+
await webui_manager.bu_browser.close()
19+
webui_manager.bu_browser = None
20+
if webui_manager.bu_browser_context:
21+
await webui_manager.bu_browser_context.close()
22+
webui_manager.bu_browser_context = None
23+
24+
25+
def create_browser_settings_tab(webui_manager: WebuiManager):
926
"""
1027
Creates a browser settings tab.
1128
"""
@@ -125,3 +142,12 @@ def create_browser_settings_tab(webui_manager: WebuiManager) -> dict[str, Compon
125142
)
126143
)
127144
webui_manager.add_components("browser_settings", tab_components)
145+
146+
async def close_wrapper():
147+
"""Wrapper for handle_clear."""
148+
await close_browser(webui_manager)
149+
150+
headless.change(close_wrapper)
151+
keep_browser_open.change(close_wrapper)
152+
disable_security.change(close_wrapper)
153+
use_own_browser.change(close_wrapper)

0 commit comments

Comments
 (0)