Skip to content

Commit 00a0fac

Browse files
authored
Merge pull request #11 from richard-devbot/main
Upgraded Gradio UI
2 parents e481813 + d0a3804 commit 00a0fac

File tree

2 files changed

+230
-77
lines changed

2 files changed

+230
-77
lines changed

requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@ browser-use
22
langchain-google-genai
33
pyperclip
44
gradio
5-
langchain-ollama
5+
langchain-ollama
6+

webui.py

Lines changed: 228 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@
3636

3737
from src.utils import utils
3838

39-
4039
async def run_browser_agent(
4140
agent_type,
4241
llm_provider,
@@ -55,10 +54,14 @@ async def run_browser_agent(
5554
max_steps,
5655
use_vision
5756
):
58-
"""
59-
Runs the browser agent based on user configurations.
60-
"""
57+
# Ensure the recording directory exists
58+
os.makedirs(save_recording_path, exist_ok=True)
6159

60+
# Get the list of existing videos before the agent runs
61+
existing_videos = set(glob.glob(os.path.join(save_recording_path, '*.[mM][pP]4')) +
62+
glob.glob(os.path.join(save_recording_path, '*.[wW][eE][bB][mM]')))
63+
64+
# Run the agent
6265
llm = utils.get_llm_model(
6366
provider=llm_provider,
6467
model_name=llm_model_name,
@@ -67,7 +70,7 @@ async def run_browser_agent(
6770
api_key=llm_api_key
6871
)
6972
if agent_type == "org":
70-
return await run_org_agent(
73+
final_result, errors, model_actions, model_thoughts = await run_org_agent(
7174
llm=llm,
7275
headless=headless,
7376
disable_security=disable_security,
@@ -79,7 +82,7 @@ async def run_browser_agent(
7982
use_vision=use_vision
8083
)
8184
elif agent_type == "custom":
82-
return await run_custom_agent(
85+
final_result, errors, model_actions, model_thoughts = await run_custom_agent(
8386
llm=llm,
8487
use_own_browser=use_own_browser,
8588
headless=headless,
@@ -95,6 +98,16 @@ async def run_browser_agent(
9598
else:
9699
raise ValueError(f"Invalid agent type: {agent_type}")
97100

101+
# Get the list of videos after the agent runs
102+
new_videos = set(glob.glob(os.path.join(save_recording_path, '*.[mM][pP]4')) +
103+
glob.glob(os.path.join(save_recording_path, '*.[wW][eE][bB][mM]')))
104+
105+
# Find the newly created video
106+
latest_video = None
107+
if new_videos - existing_videos:
108+
latest_video = list(new_videos - existing_videos)[0] # Get the first new video
109+
110+
return final_result, errors, model_actions, model_thoughts, latest_video
98111

99112
async def run_org_agent(
100113
llm,
@@ -137,7 +150,6 @@ async def run_org_agent(
137150
await browser.close()
138151
return final_result, errors, model_actions, model_thoughts
139152

140-
141153
async def run_custom_agent(
142154
llm,
143155
use_own_browser,
@@ -228,88 +240,228 @@ async def run_custom_agent(
228240
return final_result, errors, model_actions, model_thoughts
229241

230242

231-
def main():
232-
parser = argparse.ArgumentParser(description="Gradio UI for Browser Agent")
233-
parser.add_argument("--ip", type=str, default="127.0.0.1", help="IP address to bind to")
234-
parser.add_argument("--port", type=int, default=7788, help="Port to listen on")
235-
args = parser.parse_args()
243+
import argparse
244+
import gradio as gr
245+
from gradio.themes import Base, Default, Soft, Monochrome, Glass, Origin, Citrus, Ocean
246+
import os, glob
236247

237-
js_func = """
238-
function refresh() {
239-
const url = new URL(window.location);
248+
# Define the theme map globally
249+
theme_map = {
250+
"Default": Default(),
251+
"Soft": Soft(),
252+
"Monochrome": Monochrome(),
253+
"Glass": Glass(),
254+
"Origin": Origin(),
255+
"Citrus": Citrus(),
256+
"Ocean": Ocean()
257+
}
240258

241-
if (url.searchParams.get('__theme') !== 'dark') {
242-
url.searchParams.set('__theme', 'dark');
243-
window.location.href = url.href;
244-
}
245-
}
246-
"""
259+
def create_ui(theme_name="Ocean"):
260+
css = """
261+
.gradio-container {
262+
max-width: 1200px !important;
263+
margin: auto !important;
264+
padding-top: 20px !important;
265+
}
266+
.header-text {
267+
text-align: center;
268+
margin-bottom: 30px;
269+
}
270+
.theme-section {
271+
margin-bottom: 20px;
272+
padding: 15px;
273+
border-radius: 10px;
274+
}
275+
"""
247276

248-
# Gradio UI setup
249-
with gr.Blocks(title="Browser Use WebUI", theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Plus Jakarta Sans")]),
250-
js=js_func) as demo:
251-
gr.Markdown("<center><h1>Browser Use WebUI</h1></center>")
252-
with gr.Row():
253-
agent_type = gr.Radio(["org", "custom"], label="Agent Type", value="custom")
254-
max_steps = gr.Number(label="max run steps", value=100)
255-
use_vision = gr.Checkbox(label="use vision", value=True)
277+
js = """
278+
function refresh() {
279+
const url = new URL(window.location);
280+
if (url.searchParams.get('__theme') !== 'dark') {
281+
url.searchParams.set('__theme', 'dark');
282+
window.location.href = url.href;
283+
}
284+
}
285+
"""
286+
287+
with gr.Blocks(title="Browser Use WebUI", theme=theme_map[theme_name], css=css, js=js) as demo:
256288
with gr.Row():
257-
llm_provider = gr.Dropdown(
258-
["anthropic", "openai", "gemini", "azure_openai", "deepseek", "ollama"], label="LLM Provider",
259-
value="gemini"
289+
gr.Markdown(
290+
"""
291+
# 🌐 Browser Use WebUI
292+
### Control your browser with AI assistance
293+
""",
294+
elem_classes=["header-text"]
260295
)
261-
llm_model_name = gr.Textbox(label="LLM Model Name", value="gemini-2.0-flash-exp")
262-
llm_temperature = gr.Number(label="LLM Temperature", value=1.0)
263-
with gr.Row():
264-
llm_base_url = gr.Textbox(label="LLM Base URL")
265-
llm_api_key = gr.Textbox(label="LLM API Key", type="password")
266-
267-
with gr.Accordion("Browser Settings", open=False):
268-
use_own_browser = gr.Checkbox(label="Use Own Browser", value=False)
269-
headless = gr.Checkbox(label="Headless", value=False)
270-
disable_security = gr.Checkbox(label="Disable Security", value=True)
271-
with gr.Row():
272-
window_w = gr.Number(label="Window Width", value=1920)
273-
window_h = gr.Number(label="Window Height", value=1080)
274-
save_recording_path = gr.Textbox(label="Save Recording Path", placeholder="e.g. ./tmp/record_videos",
275-
value="./tmp/record_videos")
276-
with gr.Accordion("Task Settings", open=True):
277-
task = gr.Textbox(label="Task", lines=10,
278-
value="go to google.com and type 'OpenAI' click search and give me the first url")
279-
add_infos = gr.Textbox(label="Additional Infos(Optional): Hints to help LLM complete Task", lines=5)
280-
281-
run_button = gr.Button("Run Agent", variant="primary")
282-
with gr.Column():
283-
final_result_output = gr.Textbox(label="Final Result", lines=5)
284-
errors_output = gr.Textbox(label="Errors", lines=5, )
285-
model_actions_output = gr.Textbox(label="Model Actions", lines=5)
286-
model_thoughts_output = gr.Textbox(label="Model Thoughts", lines=5)
296+
297+
with gr.Tabs() as tabs:
298+
with gr.TabItem("🤖 Agent Settings", id=1):
299+
with gr.Group():
300+
agent_type = gr.Radio(
301+
["org", "custom"],
302+
label="Agent Type",
303+
value="custom",
304+
info="Select the type of agent to use"
305+
)
306+
max_steps = gr.Slider(
307+
minimum=1,
308+
maximum=200,
309+
value=100,
310+
step=1,
311+
label="Max Run Steps",
312+
info="Maximum number of steps the agent will take"
313+
)
314+
use_vision = gr.Checkbox(
315+
label="Use Vision",
316+
value=True,
317+
info="Enable visual processing capabilities"
318+
)
319+
320+
with gr.TabItem("🔧 LLM Configuration", id=2):
321+
with gr.Group():
322+
llm_provider = gr.Dropdown(
323+
["anthropic", "openai", "gemini", "azure_openai", "deepseek", "ollama"],
324+
label="LLM Provider",
325+
value="gemini",
326+
info="Select your preferred language model provider"
327+
)
328+
llm_model_name = gr.Textbox(
329+
label="Model Name",
330+
value="gemini-2.0-flash-exp",
331+
info="Specify the model to use"
332+
)
333+
llm_temperature = gr.Slider(
334+
minimum=0.0,
335+
maximum=2.0,
336+
value=1.0,
337+
step=0.1,
338+
label="Temperature",
339+
info="Controls randomness in model outputs"
340+
)
341+
with gr.Row():
342+
llm_base_url = gr.Textbox(
343+
label="Base URL",
344+
info="API endpoint URL (if required)"
345+
)
346+
llm_api_key = gr.Textbox(
347+
label="API Key",
348+
type="password",
349+
info="Your API key"
350+
)
351+
352+
with gr.TabItem("🌐 Browser Settings", id=3):
353+
with gr.Group():
354+
with gr.Row():
355+
use_own_browser = gr.Checkbox(
356+
label="Use Own Browser",
357+
value=False,
358+
info="Use your existing browser instance"
359+
)
360+
headless = gr.Checkbox(
361+
label="Headless Mode",
362+
value=False,
363+
info="Run browser without GUI"
364+
)
365+
disable_security = gr.Checkbox(
366+
label="Disable Security",
367+
value=True,
368+
info="Disable browser security features"
369+
)
370+
371+
with gr.Row():
372+
window_w = gr.Number(
373+
label="Window Width",
374+
value=1920,
375+
info="Browser window width"
376+
)
377+
window_h = gr.Number(
378+
label="Window Height",
379+
value=1080,
380+
info="Browser window height"
381+
)
382+
383+
save_recording_path = gr.Textbox(
384+
label="Recording Path",
385+
placeholder="e.g. ./tmp/record_videos",
386+
value="./tmp/record_videos",
387+
info="Path to save browser recordings"
388+
)
389+
390+
with gr.TabItem("📝 Task Settings", id=4):
391+
task = gr.Textbox(
392+
label="Task Description",
393+
lines=4,
394+
placeholder="Enter your task here...",
395+
value="go to google.com and type 'OpenAI' click search and give me the first url",
396+
info="Describe what you want the agent to do"
397+
)
398+
add_infos = gr.Textbox(
399+
label="Additional Information",
400+
lines=3,
401+
placeholder="Add any helpful context or instructions...",
402+
info="Optional hints to help the LLM complete the task"
403+
)
404+
405+
with gr.Row():
406+
run_button = gr.Button("▶️ Run Agent", variant="primary", scale=2)
407+
stop_button = gr.Button("⏹️ Stop", variant="stop", scale=1)
408+
409+
with gr.TabItem("🎬 Recordings", id=5):
410+
recording_display = gr.Video(label="Latest Recording")
411+
412+
with gr.Group():
413+
gr.Markdown("### Results")
414+
with gr.Row():
415+
with gr.Column():
416+
final_result_output = gr.Textbox(
417+
label="Final Result",
418+
lines=3,
419+
show_label=True
420+
)
421+
with gr.Column():
422+
errors_output = gr.Textbox(
423+
label="Errors",
424+
lines=3,
425+
show_label=True
426+
)
427+
with gr.Row():
428+
with gr.Column():
429+
model_actions_output = gr.Textbox(
430+
label="Model Actions",
431+
lines=3,
432+
show_label=True
433+
)
434+
with gr.Column():
435+
model_thoughts_output = gr.Textbox(
436+
label="Model Thoughts",
437+
lines=3,
438+
show_label=True
439+
)
287440

441+
# Run button click handler
288442
run_button.click(
289443
fn=run_browser_agent,
290444
inputs=[
291-
agent_type,
292-
llm_provider,
293-
llm_model_name,
294-
llm_temperature,
295-
llm_base_url,
296-
llm_api_key,
297-
use_own_browser,
298-
headless,
299-
disable_security,
300-
window_w,
301-
window_h,
302-
save_recording_path,
303-
task,
304-
add_infos,
305-
max_steps,
306-
use_vision
445+
agent_type, llm_provider, llm_model_name, llm_temperature,
446+
llm_base_url, llm_api_key, use_own_browser, headless,
447+
disable_security, window_w, window_h, save_recording_path,
448+
task, add_infos, max_steps, use_vision
307449
],
308-
outputs=[final_result_output, errors_output, model_actions_output, model_thoughts_output],
450+
outputs=[final_result_output, errors_output, model_actions_output, model_thoughts_output, recording_display]
309451
)
310452

311-
demo.launch(server_name=args.ip, server_port=args.port)
453+
return demo
454+
455+
def main():
456+
parser = argparse.ArgumentParser(description="Gradio UI for Browser Agent")
457+
parser.add_argument("--ip", type=str, default="127.0.0.1", help="IP address to bind to")
458+
parser.add_argument("--port", type=int, default=7788, help="Port to listen on")
459+
parser.add_argument("--theme", type=str, default="Ocean", choices=theme_map.keys(), help="Theme to use for the UI")
460+
parser.add_argument("--dark-mode", action="store_true", help="Enable dark mode")
461+
args = parser.parse_args()
312462

463+
demo = create_ui(theme_name=args.theme)
464+
demo.launch(server_name=args.ip, server_port=args.port)
313465

314466
if __name__ == '__main__':
315467
main()

0 commit comments

Comments
 (0)