Skip to content

Commit 73cc098

Browse files
authored
Merge pull request #3 from warmshao/dev
add deepseek
2 parents bab6627 + c0de4b7 commit 73cc098

File tree

6 files changed

+83
-12
lines changed

6 files changed

+83
-12
lines changed

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ This project builds upon the foundation of the [browser-use](https://github.com/
66

77
1. **A Brand New WebUI:** We offer a comprehensive web interface that supports a wide range of `browser-use` functionalities. This UI is designed to be user-friendly and enables easy interaction with the browser agent.
88

9-
2. **Expanded LLM Support:** We've integrated support for various Large Language Models (LLMs), including: Gemini, OpenAI, Azure OpenAI, Anthropic etc. And we plan to add support for even more models in the future.
9+
2. **Expanded LLM Support:** We've integrated support for various Large Language Models (LLMs), including: Gemini, OpenAI, Azure OpenAI, Anthropic, DeepSeek etc. And we plan to add support for even more models in the future.
1010

1111
3. **Custom Browser Support:** You can use your own browser with our tool, eliminating the need to re-login to sites or deal with other authentication challenges. This feature also supports high-definition screen recording.
1212

@@ -43,5 +43,6 @@ This project builds upon the foundation of the [browser-use](https://github.com/
4343
```
4444
2. **Access the WebUI:** Open your web browser and navigate to `http://127.0.0.1:7788`.
4545
3. **Using Your Own Browser:**
46+
- Close all chrome windows
4647
- Open the WebUI in a non-Chrome browser, such as Firefox or Edge. This is important because the persistent browser context will use the Chrome data when running the agent.
4748
- Check the "Use Own Browser" option within the Browser Settings.

src/agent/custom_agent.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,20 @@ def update_step_info(self, model_output: CustomAgentOutput, step_info: CustomAge
151151
if completed_contents and 'None' not in completed_contents:
152152
step_info.task_progress = completed_contents
153153

154+
@time_execution_async('--get_next_action')
155+
async def get_next_action(self, input_messages: list[BaseMessage]) -> AgentOutput:
156+
"""Get next action from LLM based on current state"""
157+
158+
ret = self.llm.invoke(input_messages)
159+
parsed_json = json.loads(ret.content.replace('```json', '').replace("```", ""))
160+
parsed: AgentOutput = self.AgentOutput(**parsed_json)
161+
# cut the number of actions to max_actions_per_step
162+
parsed.action = parsed.action[: self.max_actions_per_step]
163+
self._log_response(parsed)
164+
self.n_steps += 1
165+
166+
return parsed
167+
154168
@time_execution_async('--step')
155169
async def step(self, step_info: Optional[CustomAgentStepInfo] = None) -> None:
156170
"""Execute one step of the task"""

src/utils/utils.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,23 @@ def get_llm_model(provider: str, **kwargs):
4848
else:
4949
api_key = kwargs.get("api_key")
5050

51+
return ChatOpenAI(
52+
model=kwargs.get("model_name", 'gpt-4o'),
53+
temperature=kwargs.get("temperature", 0.0),
54+
base_url=base_url,
55+
api_key=api_key
56+
)
57+
elif provider == 'deepseek':
58+
if not kwargs.get("base_url", ""):
59+
base_url = os.getenv("DEEPSEEK_ENDPOINT", "")
60+
else:
61+
base_url = kwargs.get("base_url")
62+
63+
if not kwargs.get("api_key", ""):
64+
api_key = os.getenv("DEEPSEEK_API_KEY", "")
65+
else:
66+
api_key = kwargs.get("api_key")
67+
5168
return ChatOpenAI(
5269
model=kwargs.get("model_name", 'gpt-4o'),
5370
temperature=kwargs.get("temperature", 0.0),

tests/test_browser_use.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -98,16 +98,23 @@ async def test_browser_use_custom():
9898
# api_key=os.getenv("AZURE_OPENAI_API_KEY", "")
9999
# )
100100

101+
# llm = utils.get_llm_model(
102+
# provider="gemini",
103+
# model_name="gemini-2.0-flash-exp",
104+
# temperature=1.0,
105+
# api_key=os.getenv("GOOGLE_API_KEY", "")
106+
# )
107+
101108
llm = utils.get_llm_model(
102-
provider="gemini",
103-
model_name="gemini-2.0-flash-exp",
104-
temperature=1.0,
105-
api_key=os.getenv("GOOGLE_API_KEY", "")
109+
provider="deepseek",
110+
model_name="deepseek-chat",
111+
temperature=0.8
106112
)
107113

108114
controller = CustomController()
109115
use_own_browser = False
110116
disable_security = True
117+
use_vision = False
111118
playwright = None
112119
browser_context_ = None
113120
try:
@@ -156,7 +163,8 @@ async def test_browser_use_custom():
156163
llm=llm,
157164
browser_context=browser_context,
158165
controller=controller,
159-
system_prompt_class=CustomSystemPrompt
166+
system_prompt_class=CustomSystemPrompt,
167+
use_vision=use_vision
160168
)
161169
history: AgentHistoryList = await agent.run(max_steps=10)
162170

tests/test_llm_api.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,29 @@ def test_azure_openai_model():
9595
print(ai_msg.content)
9696

9797

98+
def test_deepseek_model():
99+
from langchain_core.messages import HumanMessage
100+
from src.utils import utils
101+
102+
llm = utils.get_llm_model(
103+
provider="deepseek",
104+
model_name="deepseek-chat",
105+
temperature=0.8,
106+
base_url=os.getenv("DEEPSEEK_ENDPOINT", ""),
107+
api_key=os.getenv("DEEPSEEK_API_KEY", "")
108+
)
109+
pdb.set_trace()
110+
message = HumanMessage(
111+
content=[
112+
{"type": "text", "text": "who are you?"}
113+
]
114+
)
115+
ai_msg = llm.invoke([message])
116+
print(ai_msg.content)
117+
118+
98119
if __name__ == '__main__':
99120
# test_openai_model()
100-
test_gemini_model()
121+
# test_gemini_model()
101122
# test_azure_openai_model()
123+
test_deepseek_model()

webui.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,8 @@ async def run_browser_agent(
5252
save_recording_path,
5353
task,
5454
add_infos,
55-
max_steps
55+
max_steps,
56+
use_vision
5657
):
5758
"""
5859
Runs the browser agent based on user configurations.
@@ -75,6 +76,7 @@ async def run_browser_agent(
7576
save_recording_path=save_recording_path,
7677
task=task,
7778
max_steps=max_steps,
79+
use_vision=use_vision
7880
)
7981
elif agent_type == "custom":
8082
return await run_custom_agent(
@@ -88,6 +90,7 @@ async def run_browser_agent(
8890
task=task,
8991
add_infos=add_infos,
9092
max_steps=max_steps,
93+
use_vision=use_vision
9194
)
9295
else:
9396
raise ValueError(f"Invalid agent type: {agent_type}")
@@ -101,7 +104,8 @@ async def run_org_agent(
101104
window_h,
102105
save_recording_path,
103106
task,
104-
max_steps
107+
max_steps,
108+
use_vision
105109
):
106110
browser = Browser(
107111
config=BrowserConfig(
@@ -121,6 +125,7 @@ async def run_org_agent(
121125
agent = Agent(
122126
task=task,
123127
llm=llm,
128+
use_vision=use_vision,
124129
browser_context=browser_context,
125130
)
126131
history = await agent.run(max_steps=max_steps)
@@ -143,7 +148,8 @@ async def run_custom_agent(
143148
save_recording_path,
144149
task,
145150
add_infos,
146-
max_steps
151+
max_steps,
152+
use_vision
147153
):
148154
controller = CustomController()
149155
playwright = None
@@ -190,6 +196,7 @@ async def run_custom_agent(
190196
agent = CustomAgent(
191197
task=task,
192198
add_infos=add_infos,
199+
use_vision=use_vision,
193200
llm=llm,
194201
browser_context=browser_context,
195202
controller=controller,
@@ -245,9 +252,10 @@ def main():
245252
with gr.Row():
246253
agent_type = gr.Radio(["org", "custom"], label="Agent Type", value="custom")
247254
max_steps = gr.Number(label="max run steps", value=100)
255+
use_vision = gr.Checkbox(label="use vision", value=True)
248256
with gr.Row():
249257
llm_provider = gr.Dropdown(
250-
["anthropic", "openai", "gemini", "azure_openai"], label="LLM Provider", value="gemini"
258+
["anthropic", "openai", "gemini", "azure_openai", "deepseek"], label="LLM Provider", value="gemini"
251259
)
252260
llm_model_name = gr.Textbox(label="LLM Model Name", value="gemini-2.0-flash-exp")
253261
llm_temperature = gr.Number(label="LLM Temperature", value=1.0)
@@ -293,7 +301,8 @@ def main():
293301
save_recording_path,
294302
task,
295303
add_infos,
296-
max_steps
304+
max_steps,
305+
use_vision
297306
],
298307
outputs=[final_result_output, errors_output, model_actions_output, model_thoughts_output],
299308
)

0 commit comments

Comments
 (0)