Skip to content

Commit 6ceaf8d

Browse files
committed
adapt to browser-use==0.1.29
1 parent 8a09c96 commit 6ceaf8d

13 files changed

+407
-466
lines changed

requirements.txt

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,3 @@
1-
browser-use==0.1.19
2-
langchain-google-genai==2.0.8
1+
browser-use==0.1.29
32
pyperclip==1.9.0
4-
gradio==5.9.1
5-
langchain-ollama==0.2.2
6-
langchain-openai==0.2.14
3+
gradio==5.10.0

src/agent/custom_agent.py

Lines changed: 194 additions & 192 deletions
Large diffs are not rendered by default.

src/agent/custom_massage_manager.py

Lines changed: 17 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
AIMessage,
1616
BaseMessage,
1717
HumanMessage,
18+
ToolMessage
1819
)
1920
from langchain_openai import ChatOpenAI
2021
from ..utils.llm import DeepSeekR1ChatOpenAI
@@ -31,69 +32,44 @@ def __init__(
3132
action_descriptions: str,
3233
system_prompt_class: Type[SystemPrompt],
3334
max_input_tokens: int = 128000,
34-
estimated_tokens_per_character: int = 3,
35+
estimated_characters_per_token: int = 3,
3536
image_tokens: int = 800,
3637
include_attributes: list[str] = [],
3738
max_error_length: int = 400,
3839
max_actions_per_step: int = 10,
39-
tool_call_in_content: bool = False,
40-
use_function_calling: bool = True
40+
message_context: Optional[str] = None,
41+
use_deepseek_r1: bool = False
4142
):
4243
super().__init__(
4344
llm=llm,
4445
task=task,
4546
action_descriptions=action_descriptions,
4647
system_prompt_class=system_prompt_class,
4748
max_input_tokens=max_input_tokens,
48-
estimated_tokens_per_character=estimated_tokens_per_character,
49+
estimated_characters_per_token=estimated_characters_per_token,
4950
image_tokens=image_tokens,
5051
include_attributes=include_attributes,
5152
max_error_length=max_error_length,
5253
max_actions_per_step=max_actions_per_step,
53-
tool_call_in_content=tool_call_in_content,
54+
message_context=message_context
5455
)
55-
self.use_function_calling = use_function_calling
56+
self.tool_id = 1
57+
self.use_deepseek_r1 = use_deepseek_r1
5658
# Custom: Move Task info to state_message
5759
self.history = MessageHistory()
5860
self._add_message_with_tokens(self.system_prompt)
5961

60-
if self.use_function_calling:
61-
tool_calls = [
62-
{
63-
'name': 'CustomAgentOutput',
64-
'args': {
65-
'current_state': {
66-
'prev_action_evaluation': 'Unknown - No previous actions to evaluate.',
67-
'important_contents': '',
68-
'completed_contents': '',
69-
'thought': 'Now Google is open. Need to type OpenAI to search.',
70-
'summary': 'Type OpenAI to search.',
71-
},
72-
'action': [],
73-
},
74-
'id': '',
75-
'type': 'tool_call',
76-
}
77-
]
78-
if self.tool_call_in_content:
79-
# openai throws error if tool_calls are not responded -> move to content
80-
example_tool_call = AIMessage(
81-
content=f'{tool_calls}',
82-
tool_calls=[],
83-
)
84-
else:
85-
example_tool_call = AIMessage(
86-
content=f'',
87-
tool_calls=tool_calls,
88-
)
89-
90-
self._add_message_with_tokens(example_tool_call)
62+
if self.message_context:
63+
context_message = HumanMessage(content=self.message_context)
64+
self._add_message_with_tokens(context_message)
9165

9266
def cut_messages(self):
9367
"""Get current message list, potentially trimmed to max tokens"""
9468
diff = self.history.total_tokens - self.max_input_tokens
95-
while diff > 0 and len(self.history.messages) > 1:
96-
self.history.remove_message(1) # alway remove the oldest one
69+
min_message_len = 2 if self.message_context is not None else 1
70+
71+
while diff > 0 and len(self.history.messages) > min_message_len:
72+
self.history.remove_message(min_message_len) # alway remove the oldest message
9773
diff = self.history.total_tokens - self.max_input_tokens
9874

9975
def add_state_message(
@@ -119,10 +95,10 @@ def _count_text_tokens(self, text: str) -> int:
11995
tokens = self.llm.get_num_tokens(text)
12096
except Exception:
12197
tokens = (
122-
len(text) // self.ESTIMATED_TOKENS_PER_CHARACTER
98+
len(text) // self.estimated_characters_per_token
12399
) # Rough estimate if no tokenizer available
124100
else:
125101
tokens = (
126-
len(text) // self.ESTIMATED_TOKENS_PER_CHARACTER
102+
len(text) // self.estimated_characters_per_token
127103
) # Rough estimate if no tokenizer available
128104
return tokens

src/agent/custom_prompts.py

Lines changed: 37 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import pdb
22
from typing import List, Optional
33

4-
from browser_use.agent.prompts import SystemPrompt
4+
from browser_use.agent.prompts import SystemPrompt, AgentMessagePrompt
55
from browser_use.agent.views import ActionResult
66
from browser_use.browser.views import BrowserState
77
from langchain_core.messages import HumanMessage, SystemMessage
@@ -19,7 +19,7 @@ def important_rules(self) -> str:
1919
{
2020
"current_state": {
2121
"prev_action_evaluation": "Success|Failed|Unknown - Analyze the current elements and the image to check if the previous goals/actions are successful like intended by the task. Ignore the action result. The website is the ground truth. Also mention if something unexpected happened like new suggestions in an input field. Shortly state why/why not. Note that the result you output must be consistent with the reasoning you output afterwards. If you consider it to be 'Failed,' you should reflect on this during your thought.",
22-
"important_contents": "Output important contents closely related to user\'s instruction or task on the current page. If there is, please output the contents. If not, please output empty string ''.",
22+
"important_contents": "Output important contents closely related to user\'s instruction on the current page. If there is, please output the contents. If not, please output empty string ''.",
2323
"task_progress": "Task Progress is a general summary of the current contents that have been completed. Just summarize the contents that have been actually completed based on the content at current step and the history operations. Please list each completed item individually, such as: 1. Input username. 2. Input Password. 3. Click confirm button. Please return string type not a list.",
2424
"future_plans": "Based on the user's request and the current state, outline the remaining steps needed to complete the task. This should be a concise list of actions yet to be performed, such as: 1. Select a date. 2. Choose a specific time slot. 3. Confirm booking. Please return string type not a list.",
2525
"thought": "Think about the requirements that have been completed in previous operations and the requirements that need to be completed in the next one operation. If your output of prev_action_evaluation is 'Failed', please reflect and output your reflection here.",
@@ -142,7 +142,7 @@ def get_system_message(self) -> SystemMessage:
142142
return SystemMessage(content=AGENT_PROMPT)
143143

144144

145-
class CustomAgentMessagePrompt:
145+
class CustomAgentMessagePrompt(AgentMessagePrompt):
146146
def __init__(
147147
self,
148148
state: BrowserState,
@@ -151,11 +151,12 @@ def __init__(
151151
max_error_length: int = 400,
152152
step_info: Optional[CustomAgentStepInfo] = None,
153153
):
154-
self.state = state
155-
self.result = result
156-
self.max_error_length = max_error_length
157-
self.include_attributes = include_attributes
158-
self.step_info = step_info
154+
super(CustomAgentMessagePrompt, self).__init__(state=state,
155+
result=result,
156+
include_attributes=include_attributes,
157+
max_error_length=max_error_length,
158+
step_info=step_info
159+
)
159160

160161
def get_user_message(self) -> HumanMessage:
161162
if self.step_info:
@@ -164,8 +165,26 @@ def get_user_message(self) -> HumanMessage:
164165
step_info_description = ''
165166

166167
elements_text = self.state.element_tree.clickable_elements_to_string(include_attributes=self.include_attributes)
167-
if not elements_text:
168+
169+
has_content_above = (self.state.pixels_above or 0) > 0
170+
has_content_below = (self.state.pixels_below or 0) > 0
171+
172+
if elements_text != '':
173+
if has_content_above:
174+
elements_text = (
175+
f'... {self.state.pixels_above} pixels above - scroll or extract content to see more ...\n{elements_text}'
176+
)
177+
else:
178+
elements_text = f'[Start of page]\n{elements_text}'
179+
if has_content_below:
180+
elements_text = (
181+
f'{elements_text}\n... {self.state.pixels_below} pixels below - scroll or extract content to see more ...'
182+
)
183+
else:
184+
elements_text = f'{elements_text}\n[End of page]'
185+
else:
168186
elements_text = 'empty page'
187+
169188
state_description = f"""
170189
{step_info_description}
171190
1. Task: {self.step_info.task}
@@ -182,14 +201,15 @@ def get_user_message(self) -> HumanMessage:
182201

183202
if self.result:
184203
for i, result in enumerate(self.result):
185-
if result.extracted_content:
186-
state_description += f"\nResult of action {i + 1}/{len(self.result)}: {result.extracted_content}"
187-
if result.error:
188-
# only use last 300 characters of error
189-
error = result.error[-self.max_error_length:]
190-
state_description += (
191-
f"\nError of action {i + 1}/{len(self.result)}: ...{error}"
192-
)
204+
if result.include_in_memory:
205+
if result.extracted_content:
206+
state_description += f"\nResult of action {i + 1}/{len(self.result)}: {result.extracted_content}"
207+
if result.error:
208+
# only use last 300 characters of error
209+
error = result.error[-self.max_error_length:]
210+
state_description += (
211+
f"\nError of action {i + 1}/{len(self.result)}: ...{error}"
212+
)
193213

194214
if self.state.screenshot:
195215
# Format message for vision model

src/agent/custom_views.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def type_with_custom_actions(
4545
) -> Type["CustomAgentOutput"]:
4646
"""Extend actions with custom actions"""
4747
return create_model(
48-
"AgentOutput",
48+
"CustomAgentOutput",
4949
__base__=CustomAgentOutput,
5050
action=(
5151
list[custom_actions],

src/browser/config.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# -*- coding: utf-8 -*-
2+
# @Time : 2025/1/6
3+
# @Author : wenshao
4+
# @ProjectName: browser-use-webui
5+
# @FileName: config.py
6+
7+
import os
8+
from dataclasses import dataclass
9+
from typing import Optional
10+
11+
12+
@dataclass
13+
class BrowserPersistenceConfig:
14+
"""Configuration for browser persistence"""
15+
16+
persistent_session: bool = False
17+
user_data_dir: Optional[str] = None
18+
debugging_port: Optional[int] = None
19+
debugging_host: Optional[str] = None
20+
21+
@classmethod
22+
def from_env(cls) -> "BrowserPersistenceConfig":
23+
"""Create config from environment variables"""
24+
return cls(
25+
persistent_session=os.getenv("CHROME_PERSISTENT_SESSION", "").lower()
26+
== "true",
27+
user_data_dir=os.getenv("CHROME_USER_DATA"),
28+
debugging_port=int(os.getenv("CHROME_DEBUGGING_PORT", "9222")),
29+
debugging_host=os.getenv("CHROME_DEBUGGING_HOST", "localhost"),
30+
)

0 commit comments

Comments
 (0)