Skip to content

Commit 6c07ec2

Browse files
committed
add custom browser
1 parent 08fd44b commit 6c07ec2

File tree

12 files changed

+423
-12
lines changed

12 files changed

+423
-12
lines changed

.env.example

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
1+
OPENAI_ENDPOINT=https://api.openai.com/v1
12
OPENAI_API_KEY=
3+
24
ANTHROPIC_API_KEY=
5+
36
GOOGLE_API_KEY=
7+
48
AZURE_OPENAI_ENDPOINT=
5-
AZURE_OPENAI_KEY=
9+
AZURE_OPENAI_API_KEY=
610

711
# Set to false to disable anonymized telemetry
812
ANONYMIZED_TELEMETRY=true

requirements.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
browser-use
2+
langchain-google-genai
3+
pyperclip

src/agent/custom_agent.py

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
# -*- coding: utf-8 -*-
2+
# @Time : 2025/1/2
3+
# @Author : wenshao
4+
# @ProjectName: browser-use-webui
5+
# @FileName: custom_agent.py
6+
7+
import asyncio
8+
import json
9+
import logging
10+
import os
11+
import time
12+
import uuid
13+
from pathlib import Path
14+
from typing import Any, Optional, Type, TypeVar
15+
16+
from dotenv import load_dotenv
17+
from langchain_core.language_models.chat_models import BaseChatModel
18+
from langchain_core.messages import (
19+
BaseMessage,
20+
SystemMessage,
21+
)
22+
from openai import RateLimitError
23+
from pydantic import BaseModel, ValidationError
24+
25+
from browser_use.agent.message_manager.service import MessageManager
26+
from browser_use.agent.prompts import AgentMessagePrompt, SystemPrompt
27+
from browser_use.agent.views import (
28+
ActionResult,
29+
AgentError,
30+
AgentHistory,
31+
AgentHistoryList,
32+
AgentOutput,
33+
AgentStepInfo,
34+
)
35+
from browser_use.telemetry.views import (
36+
AgentEndTelemetryEvent,
37+
AgentRunTelemetryEvent,
38+
AgentStepErrorTelemetryEvent,
39+
)
40+
from browser_use.agent.service import Agent
41+
from browser_use.utils import time_execution_async
42+
43+
from .custom_views import CustomAgentOutput
44+
45+
logger = logging.getLogger(__name__)
46+
47+
48+
class CustomAgent(Agent):
49+
50+
def _setup_action_models(self) -> None:
51+
"""Setup dynamic action models from controller's registry"""
52+
# Get the dynamic action model from controller's registry
53+
self.ActionModel = self.controller.registry.create_action_model()
54+
# Create output model with the dynamic actions
55+
self.AgentOutput = CustomAgentOutput.type_with_custom_actions(self.ActionModel)
56+
57+
def _log_response(self, response: CustomAgentOutput) -> None:
58+
"""Log the model's response"""
59+
if 'Success' in response.current_state.evaluation_previous_goal:
60+
emoji = '👍'
61+
elif 'Failed' in response.current_state.evaluation_previous_goal:
62+
emoji = '⚠'
63+
else:
64+
emoji = '🤷'
65+
66+
logger.info(f'{emoji} Eval: {response.current_state.evaluation_previous_goal}')
67+
logger.info(f'🧠 Memory: {response.current_state.memory}')
68+
logger.info(f'🎯 Next goal: {response.current_state.next_goal}')
69+
for i, action in enumerate(response.action):
70+
logger.info(
71+
f'🛠️ Action {i + 1}/{len(response.action)}: {action.model_dump_json(exclude_unset=True)}'
72+
)
73+
74+
@time_execution_async('--step')
75+
async def step(self, step_info: Optional[AgentStepInfo] = None) -> None:
76+
"""Execute one step of the task"""
77+
logger.info(f'\n📍 Step {self.n_steps}')
78+
state = None
79+
model_output = None
80+
result: list[ActionResult] = []
81+
82+
try:
83+
state = await self.browser_context.get_state(use_vision=self.use_vision)
84+
self.message_manager.add_state_message(state, self._last_result, step_info)
85+
input_messages = self.message_manager.get_messages()
86+
model_output = await self.get_next_action(input_messages)
87+
self._save_conversation(input_messages, model_output)
88+
self.message_manager._remove_last_state_message() # we dont want the whole state in the chat history
89+
self.message_manager.add_model_output(model_output)
90+
91+
result: list[ActionResult] = await self.controller.multi_act(
92+
model_output.action, self.browser_context
93+
)
94+
self._last_result = result
95+
96+
if len(result) > 0 and result[-1].is_done:
97+
logger.info(f'📄 Result: {result[-1].extracted_content}')
98+
99+
self.consecutive_failures = 0
100+
101+
except Exception as e:
102+
result = self._handle_step_error(e)
103+
self._last_result = result
104+
105+
finally:
106+
if not result:
107+
return
108+
for r in result:
109+
if r.error:
110+
self.telemetry.capture(
111+
AgentStepErrorTelemetryEvent(
112+
agent_id=self.agent_id,
113+
error=r.error,
114+
)
115+
)
116+
if state:
117+
self._make_history_item(model_output, state, result)

src/agent/custom_prompts.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# -*- coding: utf-8 -*-
2+
# @Time : 2025/1/2
3+
# @Author : wenshao
4+
# @ProjectName: browser-use-webui
5+
# @FileName: custom_prompts.py
6+
7+
from datetime import datetime
8+
from typing import List, Optional
9+
10+
from langchain_core.messages import HumanMessage, SystemMessage
11+
12+
from browser_use.agent.views import ActionResult, AgentStepInfo
13+
from browser_use.browser.views import BrowserState
14+
from browser_use.agent.prompts import SystemPrompt
15+
16+
class CustomSystemPrompt(SystemPrompt):
17+
pass

src/agent/custom_views.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# -*- coding: utf-8 -*-
2+
# @Time : 2025/1/2
3+
# @Author : wenshao
4+
# @ProjectName: browser-use-webui
5+
# @FileName: custom_views.py
6+
7+
from dataclasses import dataclass
8+
from typing import Type
9+
from pydantic import BaseModel, ConfigDict, Field, ValidationError, create_model
10+
from browser_use.controller.registry.views import ActionModel
11+
12+
13+
@dataclass
14+
class CustomAgentStepInfo:
15+
step_number: int
16+
max_steps: int
17+
memory: str
18+
19+
20+
21+
class CustomAgentBrain(BaseModel):
22+
"""Current state of the agent"""
23+
24+
prev_action_evaluation: str
25+
memory: str
26+
progress: str
27+
thought: str
28+
summary: str
29+
action: str
30+
31+
32+
class CustomAgentOutput(BaseModel):
33+
"""Output model for agent
34+
35+
@dev note: this model is extended with custom actions in AgentService. You can also use some fields that are not in this model as provided by the linter, as long as they are registered in the DynamicActions model.
36+
"""
37+
38+
model_config = ConfigDict(arbitrary_types_allowed=True)
39+
40+
current_state: CustomAgentBrain
41+
action: list[ActionModel]
42+
43+
@staticmethod
44+
def type_with_custom_actions(custom_actions: Type[ActionModel]) -> Type['CustomAgentOutput']:
45+
"""Extend actions with custom actions"""
46+
return create_model(
47+
'AgentOutput',
48+
__base__=CustomAgentOutput,
49+
action=(list[custom_actions], Field(...)), # Properly annotated field with no default
50+
__module__=CustomAgentOutput.__module__,
51+
)

src/browser/custom_browser.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# -*- coding: utf-8 -*-
2+
# @Time : 2025/1/2
3+
# @Author : wenshao
4+
# @ProjectName: browser-use-webui
5+
# @FileName: browser.py
6+
7+
from browser_use.browser.browser import Browser, BrowserConfig
8+
from browser_use.browser.context import BrowserContextConfig, BrowserContext
9+
10+
from .custom_context import CustomBrowserContext
11+
12+
13+
class CustomBrowser(Browser):
14+
15+
async def new_context(
16+
self, config: BrowserContextConfig = BrowserContextConfig()
17+
) -> BrowserContext:
18+
"""Create a browser context"""
19+
return CustomBrowserContext(config=config, browser=self)

src/browser/context.py renamed to src/browser/custom_context.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,6 @@
1919

2020

2121
class CustomBrowserContext(BrowserContext):
22-
"""
23-
定制BrowserContext
24-
"""
25-
26-
def __init__(self,
27-
browser: 'Browser',
28-
config: BrowserContextConfig = BrowserContextConfig(),
29-
):
30-
super(CustomBrowserContext, self).__init__(browser, config)
3122

3223
async def _create_context(self, browser: PlaywrightBrowser):
3324
"""Creates a new browser context with anti-detection measures and loads cookies if available."""

src/controller/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# -*- coding: utf-8 -*-
2+
# @Time : 2025/1/2
3+
# @Author : wenshao
4+
# @ProjectName: browser-use-webui
5+
# @FileName: __init__.py.py
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# -*- coding: utf-8 -*-
2+
# @Time : 2025/1/2
3+
# @Author : wenshao
4+
# @ProjectName: browser-use-webui
5+
# @FileName: custom_action.py
6+
7+
import pyperclip
8+
9+
from browser_use.controller.service import Controller
10+
from browser_use.agent.views import ActionResult
11+
from browser_use.browser.context import BrowserContext
12+
13+
14+
class CustomController(Controller):
15+
def __init__(self):
16+
super().__init__()
17+
self._register_custom_actions()
18+
19+
def _register_custom_actions(self):
20+
"""Register all custom browser actions"""
21+
22+
@self.registry.action('Copy text to clipboard')
23+
def copy_to_clipboard(text: str):
24+
pyperclip.copy(text)
25+
return ActionResult(extracted_content=text)
26+
27+
@self.registry.action('Paste text from clipboard', requires_browser=True)
28+
async def paste_from_clipboard(browser: BrowserContext):
29+
text = pyperclip.paste()
30+
# send text to browser
31+
page = await browser.get_current_page()
32+
await page.keyboard.type(text)
33+
34+
return ActionResult(extracted_content=text)

src/utils/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ def get_llm_model(provider: str, **kwargs):
1818
:param kwargs:
1919
:return:
2020
"""
21-
if provider == 'claude':
21+
if provider == 'anthropic':
2222
return ChatAnthropic(
2323
model_name=kwargs.get("model_name", 'claude-3-5-sonnet-20240620'),
2424
temperature=kwargs.get("temperature", 0.0),

0 commit comments

Comments
 (0)