36
36
37
37
from src .utils import utils
38
38
39
-
40
39
async def run_browser_agent (
41
40
agent_type ,
42
41
llm_provider ,
@@ -55,10 +54,14 @@ async def run_browser_agent(
55
54
max_steps ,
56
55
use_vision
57
56
):
58
- """
59
- Runs the browser agent based on user configurations.
60
- """
57
+ # Ensure the recording directory exists
58
+ os .makedirs (save_recording_path , exist_ok = True )
61
59
60
+ # Get the list of existing videos before the agent runs
61
+ existing_videos = set (glob .glob (os .path .join (save_recording_path , '*.[mM][pP]4' )) +
62
+ glob .glob (os .path .join (save_recording_path , '*.[wW][eE][bB][mM]' )))
63
+
64
+ # Run the agent
62
65
llm = utils .get_llm_model (
63
66
provider = llm_provider ,
64
67
model_name = llm_model_name ,
@@ -67,7 +70,7 @@ async def run_browser_agent(
67
70
api_key = llm_api_key
68
71
)
69
72
if agent_type == "org" :
70
- return await run_org_agent (
73
+ final_result , errors , model_actions , model_thoughts = await run_org_agent (
71
74
llm = llm ,
72
75
headless = headless ,
73
76
disable_security = disable_security ,
@@ -79,7 +82,7 @@ async def run_browser_agent(
79
82
use_vision = use_vision
80
83
)
81
84
elif agent_type == "custom" :
82
- return await run_custom_agent (
85
+ final_result , errors , model_actions , model_thoughts = await run_custom_agent (
83
86
llm = llm ,
84
87
use_own_browser = use_own_browser ,
85
88
headless = headless ,
@@ -95,6 +98,16 @@ async def run_browser_agent(
95
98
else :
96
99
raise ValueError (f"Invalid agent type: { agent_type } " )
97
100
101
+ # Get the list of videos after the agent runs
102
+ new_videos = set (glob .glob (os .path .join (save_recording_path , '*.[mM][pP]4' )) +
103
+ glob .glob (os .path .join (save_recording_path , '*.[wW][eE][bB][mM]' )))
104
+
105
+ # Find the newly created video
106
+ latest_video = None
107
+ if new_videos - existing_videos :
108
+ latest_video = list (new_videos - existing_videos )[0 ] # Get the first new video
109
+
110
+ return final_result , errors , model_actions , model_thoughts , latest_video
98
111
99
112
async def run_org_agent (
100
113
llm ,
@@ -137,7 +150,6 @@ async def run_org_agent(
137
150
await browser .close ()
138
151
return final_result , errors , model_actions , model_thoughts
139
152
140
-
141
153
async def run_custom_agent (
142
154
llm ,
143
155
use_own_browser ,
@@ -228,88 +240,228 @@ async def run_custom_agent(
228
240
return final_result , errors , model_actions , model_thoughts
229
241
230
242
231
- def main ():
232
- parser = argparse .ArgumentParser (description = "Gradio UI for Browser Agent" )
233
- parser .add_argument ("--ip" , type = str , default = "127.0.0.1" , help = "IP address to bind to" )
234
- parser .add_argument ("--port" , type = int , default = 7788 , help = "Port to listen on" )
235
- args = parser .parse_args ()
243
+ import argparse
244
+ import gradio as gr
245
+ from gradio .themes import Base , Default , Soft , Monochrome , Glass , Origin , Citrus , Ocean
246
+ import os , glob
236
247
237
- js_func = """
238
- function refresh() {
239
- const url = new URL(window.location);
248
+ # Define the theme map globally
249
+ theme_map = {
250
+ "Default" : Default (),
251
+ "Soft" : Soft (),
252
+ "Monochrome" : Monochrome (),
253
+ "Glass" : Glass (),
254
+ "Origin" : Origin (),
255
+ "Citrus" : Citrus (),
256
+ "Ocean" : Ocean ()
257
+ }
240
258
241
- if (url.searchParams.get('__theme') !== 'dark') {
242
- url.searchParams.set('__theme', 'dark');
243
- window.location.href = url.href;
244
- }
245
- }
246
- """
259
+ def create_ui (theme_name = "Ocean" ):
260
+ css = """
261
+ .gradio-container {
262
+ max-width: 1200px !important;
263
+ margin: auto !important;
264
+ padding-top: 20px !important;
265
+ }
266
+ .header-text {
267
+ text-align: center;
268
+ margin-bottom: 30px;
269
+ }
270
+ .theme-section {
271
+ margin-bottom: 20px;
272
+ padding: 15px;
273
+ border-radius: 10px;
274
+ }
275
+ """
247
276
248
- # Gradio UI setup
249
- with gr .Blocks (title = "Browser Use WebUI" , theme = gr .themes .Soft (font = [gr .themes .GoogleFont ("Plus Jakarta Sans" )]),
250
- js = js_func ) as demo :
251
- gr .Markdown ("<center><h1>Browser Use WebUI</h1></center>" )
252
- with gr .Row ():
253
- agent_type = gr .Radio (["org" , "custom" ], label = "Agent Type" , value = "custom" )
254
- max_steps = gr .Number (label = "max run steps" , value = 100 )
255
- use_vision = gr .Checkbox (label = "use vision" , value = True )
277
+ js = """
278
+ function refresh() {
279
+ const url = new URL(window.location);
280
+ if (url.searchParams.get('__theme') !== 'dark') {
281
+ url.searchParams.set('__theme', 'dark');
282
+ window.location.href = url.href;
283
+ }
284
+ }
285
+ """
286
+
287
+ with gr .Blocks (title = "Browser Use WebUI" , theme = theme_map [theme_name ], css = css , js = js ) as demo :
256
288
with gr .Row ():
257
- llm_provider = gr .Dropdown (
258
- ["anthropic" , "openai" , "gemini" , "azure_openai" , "deepseek" , "ollama" ], label = "LLM Provider" ,
259
- value = "gemini"
289
+ gr .Markdown (
290
+ """
291
+ # 🌐 Browser Use WebUI
292
+ ### Control your browser with AI assistance
293
+ """ ,
294
+ elem_classes = ["header-text" ]
260
295
)
261
- llm_model_name = gr .Textbox (label = "LLM Model Name" , value = "gemini-2.0-flash-exp" )
262
- llm_temperature = gr .Number (label = "LLM Temperature" , value = 1.0 )
263
- with gr .Row ():
264
- llm_base_url = gr .Textbox (label = "LLM Base URL" )
265
- llm_api_key = gr .Textbox (label = "LLM API Key" , type = "password" )
266
-
267
- with gr .Accordion ("Browser Settings" , open = False ):
268
- use_own_browser = gr .Checkbox (label = "Use Own Browser" , value = False )
269
- headless = gr .Checkbox (label = "Headless" , value = False )
270
- disable_security = gr .Checkbox (label = "Disable Security" , value = True )
271
- with gr .Row ():
272
- window_w = gr .Number (label = "Window Width" , value = 1920 )
273
- window_h = gr .Number (label = "Window Height" , value = 1080 )
274
- save_recording_path = gr .Textbox (label = "Save Recording Path" , placeholder = "e.g. ./tmp/record_videos" ,
275
- value = "./tmp/record_videos" )
276
- with gr .Accordion ("Task Settings" , open = True ):
277
- task = gr .Textbox (label = "Task" , lines = 10 ,
278
- value = "go to google.com and type 'OpenAI' click search and give me the first url" )
279
- add_infos = gr .Textbox (label = "Additional Infos(Optional): Hints to help LLM complete Task" , lines = 5 )
280
-
281
- run_button = gr .Button ("Run Agent" , variant = "primary" )
282
- with gr .Column ():
283
- final_result_output = gr .Textbox (label = "Final Result" , lines = 5 )
284
- errors_output = gr .Textbox (label = "Errors" , lines = 5 , )
285
- model_actions_output = gr .Textbox (label = "Model Actions" , lines = 5 )
286
- model_thoughts_output = gr .Textbox (label = "Model Thoughts" , lines = 5 )
296
+
297
+ with gr .Tabs () as tabs :
298
+ with gr .TabItem ("🤖 Agent Settings" , id = 1 ):
299
+ with gr .Group ():
300
+ agent_type = gr .Radio (
301
+ ["org" , "custom" ],
302
+ label = "Agent Type" ,
303
+ value = "custom" ,
304
+ info = "Select the type of agent to use"
305
+ )
306
+ max_steps = gr .Slider (
307
+ minimum = 1 ,
308
+ maximum = 200 ,
309
+ value = 100 ,
310
+ step = 1 ,
311
+ label = "Max Run Steps" ,
312
+ info = "Maximum number of steps the agent will take"
313
+ )
314
+ use_vision = gr .Checkbox (
315
+ label = "Use Vision" ,
316
+ value = True ,
317
+ info = "Enable visual processing capabilities"
318
+ )
319
+
320
+ with gr .TabItem ("🔧 LLM Configuration" , id = 2 ):
321
+ with gr .Group ():
322
+ llm_provider = gr .Dropdown (
323
+ ["anthropic" , "openai" , "gemini" , "azure_openai" , "deepseek" , "ollama" ],
324
+ label = "LLM Provider" ,
325
+ value = "gemini" ,
326
+ info = "Select your preferred language model provider"
327
+ )
328
+ llm_model_name = gr .Textbox (
329
+ label = "Model Name" ,
330
+ value = "gemini-2.0-flash-exp" ,
331
+ info = "Specify the model to use"
332
+ )
333
+ llm_temperature = gr .Slider (
334
+ minimum = 0.0 ,
335
+ maximum = 2.0 ,
336
+ value = 1.0 ,
337
+ step = 0.1 ,
338
+ label = "Temperature" ,
339
+ info = "Controls randomness in model outputs"
340
+ )
341
+ with gr .Row ():
342
+ llm_base_url = gr .Textbox (
343
+ label = "Base URL" ,
344
+ info = "API endpoint URL (if required)"
345
+ )
346
+ llm_api_key = gr .Textbox (
347
+ label = "API Key" ,
348
+ type = "password" ,
349
+ info = "Your API key"
350
+ )
351
+
352
+ with gr .TabItem ("🌐 Browser Settings" , id = 3 ):
353
+ with gr .Group ():
354
+ with gr .Row ():
355
+ use_own_browser = gr .Checkbox (
356
+ label = "Use Own Browser" ,
357
+ value = False ,
358
+ info = "Use your existing browser instance"
359
+ )
360
+ headless = gr .Checkbox (
361
+ label = "Headless Mode" ,
362
+ value = False ,
363
+ info = "Run browser without GUI"
364
+ )
365
+ disable_security = gr .Checkbox (
366
+ label = "Disable Security" ,
367
+ value = True ,
368
+ info = "Disable browser security features"
369
+ )
370
+
371
+ with gr .Row ():
372
+ window_w = gr .Number (
373
+ label = "Window Width" ,
374
+ value = 1920 ,
375
+ info = "Browser window width"
376
+ )
377
+ window_h = gr .Number (
378
+ label = "Window Height" ,
379
+ value = 1080 ,
380
+ info = "Browser window height"
381
+ )
382
+
383
+ save_recording_path = gr .Textbox (
384
+ label = "Recording Path" ,
385
+ placeholder = "e.g. ./tmp/record_videos" ,
386
+ value = "./tmp/record_videos" ,
387
+ info = "Path to save browser recordings"
388
+ )
389
+
390
+ with gr .TabItem ("📝 Task Settings" , id = 4 ):
391
+ task = gr .Textbox (
392
+ label = "Task Description" ,
393
+ lines = 4 ,
394
+ placeholder = "Enter your task here..." ,
395
+ value = "go to google.com and type 'OpenAI' click search and give me the first url" ,
396
+ info = "Describe what you want the agent to do"
397
+ )
398
+ add_infos = gr .Textbox (
399
+ label = "Additional Information" ,
400
+ lines = 3 ,
401
+ placeholder = "Add any helpful context or instructions..." ,
402
+ info = "Optional hints to help the LLM complete the task"
403
+ )
404
+
405
+ with gr .Row ():
406
+ run_button = gr .Button ("▶️ Run Agent" , variant = "primary" , scale = 2 )
407
+ stop_button = gr .Button ("⏹️ Stop" , variant = "stop" , scale = 1 )
408
+
409
+ with gr .TabItem ("🎬 Recordings" , id = 5 ):
410
+ recording_display = gr .Video (label = "Latest Recording" )
411
+
412
+ with gr .Group ():
413
+ gr .Markdown ("### Results" )
414
+ with gr .Row ():
415
+ with gr .Column ():
416
+ final_result_output = gr .Textbox (
417
+ label = "Final Result" ,
418
+ lines = 3 ,
419
+ show_label = True
420
+ )
421
+ with gr .Column ():
422
+ errors_output = gr .Textbox (
423
+ label = "Errors" ,
424
+ lines = 3 ,
425
+ show_label = True
426
+ )
427
+ with gr .Row ():
428
+ with gr .Column ():
429
+ model_actions_output = gr .Textbox (
430
+ label = "Model Actions" ,
431
+ lines = 3 ,
432
+ show_label = True
433
+ )
434
+ with gr .Column ():
435
+ model_thoughts_output = gr .Textbox (
436
+ label = "Model Thoughts" ,
437
+ lines = 3 ,
438
+ show_label = True
439
+ )
287
440
441
+ # Run button click handler
288
442
run_button .click (
289
443
fn = run_browser_agent ,
290
444
inputs = [
291
- agent_type ,
292
- llm_provider ,
293
- llm_model_name ,
294
- llm_temperature ,
295
- llm_base_url ,
296
- llm_api_key ,
297
- use_own_browser ,
298
- headless ,
299
- disable_security ,
300
- window_w ,
301
- window_h ,
302
- save_recording_path ,
303
- task ,
304
- add_infos ,
305
- max_steps ,
306
- use_vision
445
+ agent_type , llm_provider , llm_model_name , llm_temperature ,
446
+ llm_base_url , llm_api_key , use_own_browser , headless ,
447
+ disable_security , window_w , window_h , save_recording_path ,
448
+ task , add_infos , max_steps , use_vision
307
449
],
308
- outputs = [final_result_output , errors_output , model_actions_output , model_thoughts_output ],
450
+ outputs = [final_result_output , errors_output , model_actions_output , model_thoughts_output , recording_display ]
309
451
)
310
452
311
- demo .launch (server_name = args .ip , server_port = args .port )
453
+ return demo
454
+
455
+ def main ():
456
+ parser = argparse .ArgumentParser (description = "Gradio UI for Browser Agent" )
457
+ parser .add_argument ("--ip" , type = str , default = "127.0.0.1" , help = "IP address to bind to" )
458
+ parser .add_argument ("--port" , type = int , default = 7788 , help = "Port to listen on" )
459
+ parser .add_argument ("--theme" , type = str , default = "Ocean" , choices = theme_map .keys (), help = "Theme to use for the UI" )
460
+ parser .add_argument ("--dark-mode" , action = "store_true" , help = "Enable dark mode" )
461
+ args = parser .parse_args ()
312
462
463
+ demo = create_ui (theme_name = args .theme )
464
+ demo .launch (server_name = args .ip , server_port = args .port )
313
465
314
466
if __name__ == '__main__' :
315
467
main ()
0 commit comments