Skip to content

Commit 2024ad3

Browse files
committed
support connect own browser
1 parent 863e865 commit 2024ad3

File tree

5 files changed

+136
-40
lines changed

5 files changed

+136
-40
lines changed

src/agent/custom_prompts.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,10 @@ def important_rules(self) -> str:
2626
{
2727
"current_state": {
2828
"prev_action_evaluation": "Success|Failed|Unknown - Analyze the current elements and the image to check if the previous goals/actions are successful like intended by the task. Ignore the action result. The website is the ground truth. Also mention if something unexpected happened like new suggestions in an input field. Shortly state why/why not. Note that the result you output must be consistent with the reasoning you output afterwards. If you consider it to be 'Failed,' you should reflect on this during your thought.",
29-
"import_contents": "Please think about whether there is any content closely related to user\'s instruction or task on the current page? If there is, please output the content. If not, please output \"None\".",
30-
"completed_contents": "Update the task progress. Don\'t output the purpose of any operation. Completed contents is a general summary of the current contents that have been completed. Just summarize the contents that have been actually completed based on the current and the history operations. Please list each completed item individually, such as: 1. Input username. 2. Input Password. 3. Click confirm button",
29+
"import_contents": "Please think about whether there is any content closely related to user\'s instruction on the current page? If there is, please output the content. If not, please output \"None\".",
30+
"completed_contents": "Update the input Task Progress. Completed contents is a general summary of the current contents that have been completed. Just summarize the contents that have been actually completed based on the current page and the history operations. Please list each completed item individually, such as: 1. Input username. 2. Input Password. 3. Click confirm button",
3131
"thought": "Think about the requirements that have been completed in previous operations and the requirements that need to be completed in the next one operation. If the output of prev_action_evaluation is 'Failed', please reflect and output your reflection here. If you think you have entered the wrong page, consider to go back to the previous page in next action.",
32-
"summary": "Please generate a brief natural language description for the operation in next Actions based on your Thought."
32+
"summary": "Please generate a brief natural language description for the operation in next actions based on your Thought."
3333
},
3434
"action": [
3535
{

src/browser/custom_browser.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
class CustomBrowser(Browser):
1414

1515
async def new_context(
16-
self, config: BrowserContextConfig = BrowserContextConfig()
16+
self, config: BrowserContextConfig = BrowserContextConfig(), context: CustomBrowserContext = None
1717
) -> BrowserContext:
1818
"""Create a browser context"""
19-
return CustomBrowserContext(config=config, browser=self)
19+
return CustomBrowserContext(config=config, browser=self, context=context)

src/browser/custom_context.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,19 @@
2020

2121
class CustomBrowserContext(BrowserContext):
2222

23+
def __init__(
24+
self,
25+
browser: 'Browser',
26+
config: BrowserContextConfig = BrowserContextConfig(),
27+
context: BrowserContext = None
28+
):
29+
super(CustomBrowserContext, self).__init__(browser, config)
30+
self.context = context
31+
2332
async def _create_context(self, browser: PlaywrightBrowser):
2433
"""Creates a new browser context with anti-detection measures and loads cookies if available."""
34+
if self.context:
35+
return self.context
2536
if self.browser.config.chrome_instance_path and len(browser.contexts) > 0:
2637
# Connect to existing Chrome instance instead of creating new one
2738
context = browser.contexts[0]

tests/test_browser_use.py

Lines changed: 83 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,14 @@
33
# @Author : wenshao
44
# @ProjectName: browser-use-webui
55
# @FileName: test_browser_use.py
6+
import pdb
7+
68
from dotenv import load_dotenv
79

810
load_dotenv()
11+
import sys
912

13+
sys.path.append(".")
1014
import os
1115
import sys
1216
from pprint import pprint
@@ -74,13 +78,15 @@ async def test_browser_use_org():
7478

7579

7680
async def test_browser_use_custom():
81+
from playwright.async_api import async_playwright
7782
from browser_use.browser.context import BrowserContextWindowSize
7883

7984
from src.browser.custom_browser import CustomBrowser, BrowserConfig
8085
from src.browser.custom_context import BrowserContext, BrowserContextConfig
8186
from src.controller.custom_controller import CustomController
8287
from src.agent.custom_agent import CustomAgent
8388
from src.agent.custom_prompts import CustomSystemPrompt
89+
from src.browser.custom_context import CustomBrowserContext
8490

8591
window_w, window_h = 1920, 1080
8692

@@ -95,49 +101,91 @@ async def test_browser_use_custom():
95101
llm = utils.get_llm_model(
96102
provider="gemini",
97103
model_name="gemini-2.0-flash-exp",
98-
temperature=0.8,
104+
temperature=1.0,
99105
api_key=os.getenv("GOOGLE_API_KEY", "")
100106
)
101107

102-
browser = CustomBrowser(
103-
config=BrowserConfig(
104-
headless=False,
105-
disable_security=True,
106-
extra_chromium_args=[f'--window-size={window_w},{window_h}'],
107-
)
108-
)
109108
controller = CustomController()
110-
async with await browser.new_context(
111-
config=BrowserContextConfig(
112-
trace_path='./tmp/traces',
113-
save_recording_path="./tmp/record_videos",
109+
use_own_browser = True
110+
disable_security = True
111+
playwright = None
112+
browser_context_ = None
113+
try:
114+
if use_own_browser:
115+
playwright = await async_playwright().start()
116+
chrome_exe = os.getenv("CHROME_PATH", "")
117+
chrome_use_data = os.getenv("CHROME_USER_DATA", "")
118+
browser_context_ = await playwright.chromium.launch_persistent_context(
119+
user_data_dir=chrome_use_data,
120+
executable_path=chrome_exe,
114121
no_viewport=False,
115-
browser_window_size=BrowserContextWindowSize(width=window_w, height=window_h),
122+
headless=False, # 保持浏览器窗口可见
123+
user_agent=(
124+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
125+
'(KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36'
126+
),
127+
java_script_enabled=True,
128+
bypass_csp=disable_security,
129+
ignore_https_errors=disable_security,
130+
record_video_dir="./tmp/record_videos",
131+
record_video_size={'width': window_w, 'height': window_h}
132+
)
133+
else:
134+
browser_context_ = None
135+
136+
browser = CustomBrowser(
137+
config=BrowserConfig(
138+
headless=False,
139+
disable_security=True,
140+
extra_chromium_args=[f'--window-size={window_w},{window_h}'],
116141
)
117-
) as browser_context:
118-
agent = Agent(
119-
task="go to google.com and type 'OpenAI' click search and give me the first url",
120-
llm=llm,
121-
browser_context=browser_context,
122-
controller=controller,
123-
system_prompt_class=CustomSystemPrompt
124142
)
125-
history: AgentHistoryList = await agent.run(max_steps=10)
126-
127-
print('Final Result:')
128-
pprint(history.final_result(), indent=4)
129-
130-
print('\nErrors:')
131-
pprint(history.errors(), indent=4)
132143

133-
# e.g. xPaths the model clicked on
134-
print('\nModel Outputs:')
135-
pprint(history.model_actions(), indent=4)
136-
137-
print('\nThoughts:')
138-
pprint(history.model_thoughts(), indent=4)
139-
# close browser
140-
await browser.close()
144+
async with await browser.new_context(
145+
config=BrowserContextConfig(
146+
trace_path='./tmp/result_processing',
147+
save_recording_path="./tmp/record_videos",
148+
no_viewport=False,
149+
browser_window_size=BrowserContextWindowSize(width=window_w, height=window_h),
150+
),
151+
context=browser_context_
152+
) as browser_context:
153+
agent = CustomAgent(
154+
task="go to google.com and type 'OpenAI' click search and give me the first url",
155+
add_infos="", # some hints for llm to complete the task
156+
llm=llm,
157+
browser_context=browser_context,
158+
controller=controller,
159+
system_prompt_class=CustomSystemPrompt
160+
)
161+
history: AgentHistoryList = await agent.run(max_steps=10)
162+
163+
print('Final Result:')
164+
pprint(history.final_result(), indent=4)
165+
166+
print('\nErrors:')
167+
pprint(history.errors(), indent=4)
168+
169+
# e.g. xPaths the model clicked on
170+
print('\nModel Outputs:')
171+
pprint(history.model_actions(), indent=4)
172+
173+
print('\nThoughts:')
174+
pprint(history.model_thoughts(), indent=4)
175+
# close browser
176+
except Exception as e:
177+
import traceback
178+
traceback.print_exc()
179+
finally:
180+
# 显式关闭持久化上下文
181+
if browser_context_:
182+
await browser_context_.close()
183+
184+
# 关闭 Playwright 对象
185+
if playwright:
186+
await playwright.stop()
187+
188+
await browser.close()
141189

142190

143191
if __name__ == '__main__':

tests/test_playwright.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# -*- coding: utf-8 -*-
2+
# @Time : 2025/1/2
3+
# @Author : wenshao
4+
5+
# @Project : browser-use-webui
6+
# @FileName: test_playwright.py
7+
import pdb
8+
from dotenv import load_dotenv
9+
10+
load_dotenv()
11+
12+
13+
def test_connect_browser():
14+
import os
15+
from playwright.sync_api import sync_playwright
16+
17+
chrome_exe = os.getenv("CHROME_PATH", "")
18+
chrome_use_data = os.getenv("CHROME_USER_DATA", "")
19+
20+
with sync_playwright() as p:
21+
browser = p.chromium.launch_persistent_context(
22+
user_data_dir=chrome_use_data,
23+
executable_path=chrome_exe,
24+
headless=False # 保持浏览器窗口可见
25+
)
26+
27+
page = browser.new_page()
28+
page.goto("https://mail.google.com/mail/u/0/#inbox")
29+
page.wait_for_load_state()
30+
31+
input("按下回车键以关闭浏览器...")
32+
33+
browser.close()
34+
35+
36+
if __name__ == '__main__':
37+
test_connect_browser()

0 commit comments

Comments
 (0)