3
3
# @Author : wenshao
4
4
# @ProjectName: browser-use-webui
5
5
# @FileName: test_browser_use.py
6
+ import pdb
6
7
7
8
from dotenv import load_dotenv
8
9
@@ -28,20 +29,29 @@ async def test_browser_use_org():
28
29
BrowserContextWindowSize ,
29
30
)
30
31
32
+ # llm = utils.get_llm_model(
33
+ # provider="azure_openai",
34
+ # model_name="gpt-4o",
35
+ # temperature=0.8,
36
+ # base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
37
+ # api_key=os.getenv("AZURE_OPENAI_API_KEY", ""),
38
+ # )
39
+
31
40
llm = utils .get_llm_model (
32
- provider = "azure_openai" ,
33
- model_name = "gpt-4o" ,
34
- temperature = 0.8 ,
35
- base_url = os .getenv ("AZURE_OPENAI_ENDPOINT" , "" ),
36
- api_key = os .getenv ("AZURE_OPENAI_API_KEY" , "" ),
41
+ provider = "deepseek" ,
42
+ model_name = "deepseek-chat" ,
43
+ temperature = 0.8
37
44
)
38
45
39
46
window_w , window_h = 1920 , 1080
47
+ use_vision = False
48
+ chrome_path = os .getenv ("CHROME_PATH" , None )
40
49
41
50
browser = Browser (
42
51
config = BrowserConfig (
43
52
headless = False ,
44
53
disable_security = True ,
54
+ chrome_instance_path = chrome_path ,
45
55
extra_chromium_args = [f"--window-size={ window_w } ,{ window_h } " ],
46
56
)
47
57
)
@@ -59,6 +69,7 @@ async def test_browser_use_org():
59
69
task = "go to google.com and type 'OpenAI' click search and give me the first url" ,
60
70
llm = llm ,
61
71
browser_context = browser_context ,
72
+ use_vision = use_vision
62
73
)
63
74
history : AgentHistoryList = await agent .run (max_steps = 10 )
64
75
@@ -208,6 +219,122 @@ async def test_browser_use_custom():
208
219
await browser .close ()
209
220
210
221
222
+ async def test_browser_use_custom_v2 ():
223
+ from browser_use .browser .context import BrowserContextWindowSize
224
+ from browser_use .browser .browser import BrowserConfig
225
+ from playwright .async_api import async_playwright
226
+
227
+ from src .agent .custom_agent import CustomAgent
228
+ from src .agent .custom_prompts import CustomSystemPrompt
229
+ from src .browser .custom_browser import CustomBrowser
230
+ from src .browser .custom_context import BrowserContextConfig
231
+ from src .controller .custom_controller import CustomController
232
+
233
+ window_w , window_h = 1920 , 1080
234
+
235
+ # llm = utils.get_llm_model(
236
+ # provider="azure_openai",
237
+ # model_name="gpt-4o",
238
+ # temperature=0.8,
239
+ # base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
240
+ # api_key=os.getenv("AZURE_OPENAI_API_KEY", ""),
241
+ # )
242
+
243
+ # llm = utils.get_llm_model(
244
+ # provider="gemini",
245
+ # model_name="gemini-2.0-flash-exp",
246
+ # temperature=1.0,
247
+ # api_key=os.getenv("GOOGLE_API_KEY", "")
248
+ # )
249
+
250
+ llm = utils .get_llm_model (
251
+ provider = "deepseek" ,
252
+ model_name = "deepseek-chat" ,
253
+ temperature = 0.8
254
+ )
255
+
256
+ # llm = utils.get_llm_model(
257
+ # provider="ollama", model_name="qwen2.5:7b", temperature=0.8
258
+ # )
259
+
260
+ controller = CustomController ()
261
+ use_own_browser = True
262
+ disable_security = True
263
+ use_vision = False # Set to False when using DeepSeek
264
+ tool_call_in_content = True # Set to True when using Ollama
265
+ max_actions_per_step = 1
266
+ playwright = None
267
+ browser = None
268
+ browser_context = None
269
+
270
+ try :
271
+ if use_own_browser :
272
+ chrome_path = os .getenv ("CHROME_PATH" , None )
273
+ if chrome_path == "" :
274
+ chrome_path = None
275
+ else :
276
+ chrome_path = None
277
+ browser = CustomBrowser (
278
+ config = BrowserConfig (
279
+ headless = False ,
280
+ disable_security = disable_security ,
281
+ chrome_instance_path = chrome_path ,
282
+ extra_chromium_args = [f"--window-size={ window_w } ,{ window_h } " ],
283
+ )
284
+ )
285
+ browser_context = await browser .new_context (
286
+ config = BrowserContextConfig (
287
+ trace_path = "./tmp/traces" ,
288
+ save_recording_path = "./tmp/record_videos" ,
289
+ no_viewport = False ,
290
+ browser_window_size = BrowserContextWindowSize (
291
+ width = window_w , height = window_h
292
+ ),
293
+ )
294
+ )
295
+ agent = CustomAgent (
296
+ task = "go to google.com and type 'OpenAI' click search and give me the first url" ,
297
+ add_infos = "" , # some hints for llm to complete the task
298
+ llm = llm ,
299
+ browser = browser ,
300
+ browser_context = browser_context ,
301
+ controller = controller ,
302
+ system_prompt_class = CustomSystemPrompt ,
303
+ use_vision = use_vision ,
304
+ tool_call_in_content = tool_call_in_content ,
305
+ max_actions_per_step = max_actions_per_step
306
+ )
307
+ history : AgentHistoryList = await agent .run (max_steps = 10 )
308
+
309
+ print ("Final Result:" )
310
+ pprint (history .final_result (), indent = 4 )
311
+
312
+ print ("\n Errors:" )
313
+ pprint (history .errors (), indent = 4 )
314
+
315
+ # e.g. xPaths the model clicked on
316
+ print ("\n Model Outputs:" )
317
+ pprint (history .model_actions (), indent = 4 )
318
+
319
+ print ("\n Thoughts:" )
320
+ pprint (history .model_thoughts (), indent = 4 )
321
+ # close browser
322
+ except Exception :
323
+ import traceback
324
+
325
+ traceback .print_exc ()
326
+ finally :
327
+ # 显式关闭持久化上下文
328
+ if browser_context :
329
+ await browser_context .close ()
330
+
331
+ # 关闭 Playwright 对象
332
+ if playwright :
333
+ await playwright .stop ()
334
+ if browser :
335
+ await browser .close ()
336
+
211
337
if __name__ == "__main__" :
212
338
# asyncio.run(test_browser_use_org())
213
- asyncio .run (test_browser_use_custom ())
339
+ # asyncio.run(test_browser_use_custom())
340
+ asyncio .run (test_browser_use_custom_v2 ())
0 commit comments