Skip to content

Commit 0c9cb9b

Browse files
authored
Merge pull request #184 from vvincent1234/fix/adapt_latest_browser-use
Fix/adapt latest browser use
2 parents 566bca7 + 75ab505 commit 0c9cb9b

File tree

10 files changed

+115
-186
lines changed

10 files changed

+115
-186
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ playwright install
108108
- `--dark-mode`: Enables dark mode for the user interface.
109109
3. **Access the WebUI:** Open your web browser and navigate to `http://127.0.0.1:7788`.
110110
4. **Using Your Own Browser(Optional):**
111-
- Set `CHROME_PATH` to the executable path of your browser and `CHROME_USER_DATA` to the user data directory of your browser.
111+
- Set `CHROME_PATH` to the executable path of your browser and `CHROME_USER_DATA` to the user data directory of your browser. Leave `CHROME_USER_DATA` empty if you want to use local user data.
112112
- Windows
113113
```env
114114
CHROME_PATH="C:\Program Files\Google\Chrome\Application\chrome.exe"
@@ -118,7 +118,7 @@ playwright install
118118
- Mac
119119
```env
120120
CHROME_PATH="/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
121-
CHROME_USER_DATA="~/Library/Application Support/Google/Chrome/Profile 1"
121+
CHROME_USER_DATA="/Users/YourUsername/Library/Application Support/Google/Chrome"
122122
```
123123
- Close all Chrome windows
124124
- Open the WebUI in a non-Chrome browser, such as Firefox or Edge. This is important because the persistent browser context will use the Chrome data when running the agent.

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
browser-use==0.1.29
22
pyperclip==1.9.0
33
gradio==5.10.0
4+
json-repair

src/agent/custom_agent.py

Lines changed: 32 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,11 @@
88
import base64
99
import io
1010
import platform
11-
from browser_use.agent.prompts import SystemPrompt
11+
from browser_use.agent.prompts import SystemPrompt, AgentMessagePrompt
1212
from browser_use.agent.service import Agent
1313
from browser_use.agent.views import (
1414
ActionResult,
15+
ActionModel,
1516
AgentHistoryList,
1617
AgentOutput,
1718
AgentHistory,
@@ -30,6 +31,7 @@
3031
from langchain_core.messages import (
3132
BaseMessage,
3233
)
34+
from json_repair import repair_json
3335
from src.utils.agent_state import AgentState
3436

3537
from .custom_massage_manager import CustomMassageManager
@@ -52,6 +54,7 @@ def __init__(
5254
max_failures: int = 5,
5355
retry_delay: int = 10,
5456
system_prompt_class: Type[SystemPrompt] = SystemPrompt,
57+
agent_prompt_class: Type[AgentMessagePrompt] = AgentMessagePrompt,
5558
max_input_tokens: int = 128000,
5659
validate_output: bool = False,
5760
include_attributes: list[str] = [
@@ -98,28 +101,31 @@ def __init__(
98101
register_done_callback=register_done_callback,
99102
tool_calling_method=tool_calling_method
100103
)
101-
if self.model_name in ["deepseek-reasoner"] or self.model_name.startswith("deepseek-r1"):
104+
if self.model_name in ["deepseek-reasoner"] or "deepseek-r1" in self.model_name:
102105
# deepseek-reasoner does not support function calling
103106
self.use_deepseek_r1 = True
104107
# deepseek-reasoner only support 64000 context
105108
self.max_input_tokens = 64000
106109
else:
107110
self.use_deepseek_r1 = False
108111

112+
# record last actions
113+
self._last_actions = None
109114
# custom new info
110115
self.add_infos = add_infos
111116
# agent_state for Stop
112117
self.agent_state = agent_state
118+
self.agent_prompt_class = agent_prompt_class
113119
self.message_manager = CustomMassageManager(
114120
llm=self.llm,
115121
task=self.task,
116122
action_descriptions=self.controller.registry.get_prompt_description(),
117123
system_prompt_class=self.system_prompt_class,
124+
agent_prompt_class=agent_prompt_class,
118125
max_input_tokens=self.max_input_tokens,
119126
include_attributes=self.include_attributes,
120127
max_error_length=self.max_error_length,
121-
max_actions_per_step=self.max_actions_per_step,
122-
use_deepseek_r1=self.use_deepseek_r1
128+
max_actions_per_step=self.max_actions_per_step
123129
)
124130

125131
def _setup_action_models(self) -> None:
@@ -186,9 +192,11 @@ async def get_next_action(self, input_messages: list[BaseMessage]) -> AgentOutpu
186192
logger.info(ai_message.reasoning_content)
187193
logger.info(f"🤯 End Deep Thinking")
188194
if isinstance(ai_message.content, list):
189-
parsed_json = json.loads(ai_message.content[0].replace("```json", "").replace("```", ""))
195+
ai_content = ai_message.content[0].replace("```json", "").replace("```", "")
190196
else:
191-
parsed_json = json.loads(ai_message.content.replace("```json", "").replace("```", ""))
197+
ai_content = ai_message.content.replace("```json", "").replace("```", "")
198+
ai_content = repair_json(ai_content)
199+
parsed_json = json.loads(ai_content)
192200
parsed: AgentOutput = self.AgentOutput(**parsed_json)
193201
if parsed is None:
194202
logger.debug(ai_message.content)
@@ -197,9 +205,11 @@ async def get_next_action(self, input_messages: list[BaseMessage]) -> AgentOutpu
197205
ai_message = self.llm.invoke(input_messages)
198206
self.message_manager._add_message_with_tokens(ai_message)
199207
if isinstance(ai_message.content, list):
200-
parsed_json = json.loads(ai_message.content[0].replace("```json", "").replace("```", ""))
208+
ai_content = ai_message.content[0].replace("```json", "").replace("```", "")
201209
else:
202-
parsed_json = json.loads(ai_message.content.replace("```json", "").replace("```", ""))
210+
ai_content = ai_message.content.replace("```json", "").replace("```", "")
211+
ai_content = repair_json(ai_content)
212+
parsed_json = json.loads(ai_content)
203213
parsed: AgentOutput = self.AgentOutput(**parsed_json)
204214
if parsed is None:
205215
logger.debug(ai_message.content)
@@ -222,7 +232,7 @@ async def step(self, step_info: Optional[CustomAgentStepInfo] = None) -> None:
222232

223233
try:
224234
state = await self.browser_context.get_state(use_vision=self.use_vision)
225-
self.message_manager.add_state_message(state, self._last_result, step_info)
235+
self.message_manager.add_state_message(state, self._last_actions, self._last_result, step_info)
226236
input_messages = self.message_manager.get_messages()
227237
try:
228238
model_output = await self.get_next_action(input_messages)
@@ -231,27 +241,31 @@ async def step(self, step_info: Optional[CustomAgentStepInfo] = None) -> None:
231241
self.update_step_info(model_output, step_info)
232242
logger.info(f"🧠 All Memory: \n{step_info.memory}")
233243
self._save_conversation(input_messages, model_output)
234-
# should we remove last state message? at least, deepseek-reasoner cannot remove
235244
if self.model_name != "deepseek-reasoner":
236-
self.message_manager._remove_last_state_message()
245+
# remove prev message
246+
self.message_manager._remove_state_message_by_index(-1)
237247
except Exception as e:
238248
# model call failed, remove last state message from history
239-
self.message_manager._remove_last_state_message()
249+
self.message_manager._remove_state_message_by_index(-1)
240250
raise e
241251

252+
actions: list[ActionModel] = model_output.action
242253
result: list[ActionResult] = await self.controller.multi_act(
243-
model_output.action, self.browser_context
254+
actions, self.browser_context
244255
)
245-
if len(result) != len(model_output.action):
256+
if len(result) != len(actions):
246257
# I think something changes, such information should let LLM know
247-
for ri in range(len(result), len(model_output.action)):
258+
for ri in range(len(result), len(actions)):
248259
result.append(ActionResult(extracted_content=None,
249260
include_in_memory=True,
250-
error=f"{model_output.action[ri].model_dump_json(exclude_unset=True)} is Failed to execute. \
251-
Something new appeared after action {model_output.action[len(result) - 1].model_dump_json(exclude_unset=True)}",
261+
error=f"{actions[ri].model_dump_json(exclude_unset=True)} is Failed to execute. \
262+
Something new appeared after action {actions[len(result) - 1].model_dump_json(exclude_unset=True)}",
252263
is_done=False))
264+
if len(actions) == 0:
265+
# TODO: fix no action case
266+
result = [ActionResult(is_done=True, extracted_content=step_info.memory, include_in_memory=True)]
253267
self._last_result = result
254-
268+
self._last_actions = actions
255269
if len(result) > 0 and result[-1].is_done:
256270
logger.info(f"📄 Result: {result[-1].extracted_content}")
257271

src/agent/custom_massage_manager.py

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55

66
from browser_use.agent.message_manager.service import MessageManager
77
from browser_use.agent.message_manager.views import MessageHistory
8-
from browser_use.agent.prompts import SystemPrompt
9-
from browser_use.agent.views import ActionResult, AgentStepInfo
8+
from browser_use.agent.prompts import SystemPrompt, AgentMessagePrompt
9+
from browser_use.agent.views import ActionResult, AgentStepInfo, ActionModel
1010
from browser_use.browser.views import BrowserState
1111
from langchain_core.language_models import BaseChatModel
1212
from langchain_anthropic import ChatAnthropic
@@ -31,14 +31,14 @@ def __init__(
3131
task: str,
3232
action_descriptions: str,
3333
system_prompt_class: Type[SystemPrompt],
34+
agent_prompt_class: Type[AgentMessagePrompt],
3435
max_input_tokens: int = 128000,
3536
estimated_characters_per_token: int = 3,
3637
image_tokens: int = 800,
3738
include_attributes: list[str] = [],
3839
max_error_length: int = 400,
3940
max_actions_per_step: int = 10,
40-
message_context: Optional[str] = None,
41-
use_deepseek_r1: bool = False
41+
message_context: Optional[str] = None
4242
):
4343
super().__init__(
4444
llm=llm,
@@ -53,8 +53,7 @@ def __init__(
5353
max_actions_per_step=max_actions_per_step,
5454
message_context=message_context
5555
)
56-
self.tool_id = 1
57-
self.use_deepseek_r1 = use_deepseek_r1
56+
self.agent_prompt_class = agent_prompt_class
5857
# Custom: Move Task info to state_message
5958
self.history = MessageHistory()
6059
self._add_message_with_tokens(self.system_prompt)
@@ -75,13 +74,15 @@ def cut_messages(self):
7574
def add_state_message(
7675
self,
7776
state: BrowserState,
77+
actions: Optional[List[ActionModel]] = None,
7878
result: Optional[List[ActionResult]] = None,
7979
step_info: Optional[AgentStepInfo] = None,
8080
) -> None:
8181
"""Add browser state as human message"""
8282
# otherwise add state message and result to next message (which will not stay in memory)
83-
state_message = CustomAgentMessagePrompt(
83+
state_message = self.agent_prompt_class(
8484
state,
85+
actions,
8586
result,
8687
include_attributes=self.include_attributes,
8788
max_error_length=self.max_error_length,
@@ -102,3 +103,15 @@ def _count_text_tokens(self, text: str) -> int:
102103
len(text) // self.estimated_characters_per_token
103104
) # Rough estimate if no tokenizer available
104105
return tokens
106+
107+
def _remove_state_message_by_index(self, remove_ind=-1) -> None:
108+
"""Remove last state message from history"""
109+
i = len(self.history.messages) - 1
110+
remove_cnt = 0
111+
while i >= 0:
112+
if isinstance(self.history.messages[i].message, HumanMessage):
113+
remove_cnt += 1
114+
if remove_cnt == abs(remove_ind):
115+
self.history.remove_message(i)
116+
break
117+
i -= 1

src/agent/custom_prompts.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from typing import List, Optional
33

44
from browser_use.agent.prompts import SystemPrompt, AgentMessagePrompt
5-
from browser_use.agent.views import ActionResult
5+
from browser_use.agent.views import ActionResult, ActionModel
66
from browser_use.browser.views import BrowserState
77
from langchain_core.messages import HumanMessage, SystemMessage
88

@@ -56,7 +56,7 @@ def important_rules(self) -> str:
5656
- Use scroll to find elements you are looking for
5757
5858
5. TASK COMPLETION:
59-
- If you think all the requirements of user\'s instruction have been completed and no further operation is required, output the done action to terminate the operation process.
59+
- If you think all the requirements of user\'s instruction have been completed and no further operation is required, output the **Done** action to terminate the operation process.
6060
- Don't hallucinate actions.
6161
- If the task requires specific information - make sure to include everything in the done function. This is what the user will see.
6262
- If you are running out of steps (current step), think about speeding it up, and ALWAYS use the done action as the last action.
@@ -140,6 +140,7 @@ class CustomAgentMessagePrompt(AgentMessagePrompt):
140140
def __init__(
141141
self,
142142
state: BrowserState,
143+
actions: Optional[List[ActionModel]] = None,
143144
result: Optional[List[ActionResult]] = None,
144145
include_attributes: list[str] = [],
145146
max_error_length: int = 400,
@@ -151,10 +152,11 @@ def __init__(
151152
max_error_length=max_error_length,
152153
step_info=step_info
153154
)
155+
self.actions = actions
154156

155157
def get_user_message(self) -> HumanMessage:
156158
if self.step_info:
157-
step_info_description = f'Current step: {self.step_info.step_number + 1}/{self.step_info.max_steps}'
159+
step_info_description = f'Current step: {self.step_info.step_number}/{self.step_info.max_steps}\n'
158160
else:
159161
step_info_description = ''
160162

@@ -181,7 +183,7 @@ def get_user_message(self) -> HumanMessage:
181183

182184
state_description = f"""
183185
{step_info_description}
184-
1. Task: {self.step_info.task}
186+
1. Task: {self.step_info.task}.
185187
2. Hints(Optional):
186188
{self.step_info.add_infos}
187189
3. Memory:
@@ -193,17 +195,20 @@ def get_user_message(self) -> HumanMessage:
193195
{elements_text}
194196
"""
195197

196-
if self.result:
197-
198+
if self.actions and self.result:
199+
state_description += "\n **Previous Actions** \n"
200+
state_description += f'Previous step: {self.step_info.step_number-1}/{self.step_info.max_steps} \n'
198201
for i, result in enumerate(self.result):
202+
action = self.actions[i]
203+
state_description += f"Previous action {i + 1}/{len(self.result)}: {action.model_dump_json(exclude_unset=True)}\n"
199204
if result.include_in_memory:
200205
if result.extracted_content:
201-
state_description += f"\nResult of previous action {i + 1}/{len(self.result)}: {result.extracted_content}"
206+
state_description += f"Result of previous action {i + 1}/{len(self.result)}: {result.extracted_content}\n"
202207
if result.error:
203208
# only use last 300 characters of error
204209
error = result.error[-self.max_error_length:]
205210
state_description += (
206-
f"\nError of previous action {i + 1}/{len(self.result)}: ...{error}"
211+
f"Error of previous action {i + 1}/{len(self.result)}: ...{error}\n"
207212
)
208213

209214
if self.state.screenshot:

src/controller/custom_controller.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from pydantic import BaseModel
44
from browser_use.agent.views import ActionResult
55
from browser_use.browser.context import BrowserContext
6-
from browser_use.controller.service import Controller
6+
from browser_use.controller.service import Controller, DoneAction
77

88

99
class CustomController(Controller):

src/utils/utils.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,9 +94,9 @@ def get_llm_model(provider: str, **kwargs):
9494
else:
9595
base_url = kwargs.get("base_url")
9696

97-
if kwargs.get("model_name", "qwen2.5:7b").startswith("deepseek-r1"):
97+
if "deepseek-r1" in kwargs.get("model_name", "qwen2.5:7b"):
9898
return DeepSeekR1ChatOllama(
99-
model=kwargs.get("model_name", "deepseek-r1:7b"),
99+
model=kwargs.get("model_name", "deepseek-r1:14b"),
100100
temperature=kwargs.get("temperature", 0.0),
101101
num_ctx=kwargs.get("num_ctx", 32000),
102102
base_url=kwargs.get("base_url", base_url),
@@ -106,6 +106,7 @@ def get_llm_model(provider: str, **kwargs):
106106
model=kwargs.get("model_name", "qwen2.5:7b"),
107107
temperature=kwargs.get("temperature", 0.0),
108108
num_ctx=kwargs.get("num_ctx", 32000),
109+
num_predict=kwargs.get("num_predict", 1024),
109110
base_url=kwargs.get("base_url", base_url),
110111
)
111112
elif provider == "azure_openai":

0 commit comments

Comments
 (0)