Skip to content

Commit 08fd44b

Browse files
committed
add gemini supported
0 parents  commit 08fd44b

File tree

11 files changed

+446
-0
lines changed

11 files changed

+446
-0
lines changed

.env.example

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
OPENAI_API_KEY=
2+
ANTHROPIC_API_KEY=
3+
GOOGLE_API_KEY=
4+
AZURE_OPENAI_ENDPOINT=
5+
AZURE_OPENAI_KEY=
6+
7+
# Set to false to disable anonymized telemetry
8+
ANONYMIZED_TELEMETRY=true
9+
10+
# LogLevel: Set to debug to enable verbose logging, set to result to get results only. Available: result | debug | info
11+
BROWSER_USE_LOGGING_LEVEL=info

.gitignore

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
# Byte-compiled / optimized / DLL files
2+
__pycache__/
3+
*.py[cod]
4+
*$py.class
5+
6+
# C extensions
7+
*.so
8+
9+
# Distribution / packaging
10+
.Python
11+
build/
12+
develop-eggs/
13+
dist/
14+
downloads/
15+
eggs/
16+
.eggs/
17+
lib/
18+
lib64/
19+
parts/
20+
sdist/
21+
var/
22+
wheels/
23+
share/python-wheels/
24+
*.egg-info/
25+
.installed.cfg
26+
*.egg
27+
MANIFEST
28+
29+
# PyInstaller
30+
# Usually these files are written by a python script from a template
31+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
32+
*.manifest
33+
*.spec
34+
35+
# Installer logs
36+
pip-log.txt
37+
pip-delete-this-directory.txt
38+
39+
# Unit test / coverage reports
40+
htmlcov/
41+
.tox/
42+
.nox/
43+
.coverage
44+
.coverage.*
45+
.cache
46+
nosetests.xml
47+
coverage.xml
48+
*.cover
49+
*.py,cover
50+
.hypothesis/
51+
.pytest_cache/
52+
cover/
53+
54+
# Translations
55+
*.mo
56+
*.pot
57+
58+
# Django stuff:
59+
*.log
60+
local_settings.py
61+
db.sqlite3
62+
db.sqlite3-journal
63+
64+
# Flask stuff:
65+
instance/
66+
.webassets-cache
67+
68+
# Scrapy stuff:
69+
.scrapy
70+
71+
# Sphinx documentation
72+
docs/_build/
73+
74+
# PyBuilder
75+
.pybuilder/
76+
target/
77+
78+
# Jupyter Notebook
79+
.ipynb_checkpoints
80+
81+
# IPython
82+
profile_default/
83+
ipython_config.py
84+
85+
# pyenv
86+
# For a library or package, you might want to ignore these files since the code is
87+
# intended to run in multiple environments; otherwise, check them in:
88+
# .python-version
89+
90+
# pipenv
91+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
93+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
94+
# install all needed dependencies.
95+
#Pipfile.lock
96+
97+
# poetry
98+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99+
# This is especially recommended for binary packages to ensure reproducibility, and is more
100+
# commonly ignored for libraries.
101+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102+
#poetry.lock
103+
104+
# pdm
105+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106+
#pdm.lock
107+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108+
# in version control.
109+
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
110+
.pdm.toml
111+
.pdm-python
112+
.pdm-build/
113+
114+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
115+
__pypackages__/
116+
117+
# Celery stuff
118+
celerybeat-schedule
119+
celerybeat.pid
120+
121+
# SageMath parsed files
122+
*.sage.py
123+
124+
# Environments
125+
.env
126+
.venv
127+
env/
128+
venv/
129+
ENV/
130+
env.bak/
131+
venv.bak/
132+
test_env/
133+
134+
135+
# Spyder project settings
136+
.spyderproject
137+
.spyproject
138+
139+
# Rope project settings
140+
.ropeproject
141+
142+
# mkdocs documentation
143+
/site
144+
145+
# mypy
146+
.mypy_cache/
147+
.dmypy.json
148+
dmypy.json
149+
150+
# Pyre type checker
151+
.pyre/
152+
153+
# pytype static type analyzer
154+
.pytype/
155+
156+
# Cython debug symbols
157+
cython_debug/
158+
159+
# PyCharm
160+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
161+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
162+
# and can be added to the global gitignore or merged into this file. For a more nuclear
163+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
164+
.idea/
165+
temp
166+
tmp
167+
168+
169+
.DS_Store
170+
171+
private_example.py
172+
private_example
173+
174+
browser_cookies.json
175+
cookies.json
176+
AgentHistory.json
177+
cv_04_24.pdf
178+
AgentHistoryList.json
179+
*.gif

assets/examples/test.png

413 KB
Loading

src/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# -*- coding: utf-8 -*-
2+
# @Time : 2025/1/1
3+
# @Author : wenshao
4+
5+
# @Project : browser-use-webui
6+
# @FileName: __init__.py.py

src/agent/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# -*- coding: utf-8 -*-
2+
# @Time : 2025/1/1
3+
# @Author : wenshao
4+
5+
# @Project : browser-use-webui
6+
# @FileName: __init__.py.py

src/browser/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# -*- coding: utf-8 -*-
2+
# @Time : 2025/1/1
3+
# @Author : wenshao
4+
5+
# @Project : browser-use-webui
6+
# @FileName: __init__.py.py

src/browser/context.py

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
# -*- coding: utf-8 -*-
2+
# @Time : 2025/1/1
3+
# @Author : wenshao
4+
5+
# @Project : browser-use-webui
6+
# @FileName: context.py
7+
8+
import asyncio
9+
import base64
10+
import json
11+
import logging
12+
import os
13+
14+
from playwright.async_api import Browser as PlaywrightBrowser
15+
from browser_use.browser.context import BrowserContext, BrowserContextConfig
16+
from browser_use.browser.browser import Browser
17+
18+
logger = logging.getLogger(__name__)
19+
20+
21+
class CustomBrowserContext(BrowserContext):
22+
"""
23+
定制BrowserContext
24+
"""
25+
26+
def __init__(self,
27+
browser: 'Browser',
28+
config: BrowserContextConfig = BrowserContextConfig(),
29+
):
30+
super(CustomBrowserContext, self).__init__(browser, config)
31+
32+
async def _create_context(self, browser: PlaywrightBrowser):
33+
"""Creates a new browser context with anti-detection measures and loads cookies if available."""
34+
if self.browser.config.chrome_instance_path and len(browser.contexts) > 0:
35+
# Connect to existing Chrome instance instead of creating new one
36+
context = browser.contexts[0]
37+
else:
38+
# Original code for creating new context
39+
context = await browser.new_context(
40+
viewport=self.config.browser_window_size,
41+
no_viewport=False,
42+
user_agent=(
43+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
44+
'(KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36'
45+
),
46+
java_script_enabled=True,
47+
bypass_csp=self.config.disable_security,
48+
ignore_https_errors=self.config.disable_security,
49+
record_video_dir=self.config.save_recording_path,
50+
record_video_size=self.config.browser_window_size # set record video size
51+
)
52+
53+
if self.config.trace_path:
54+
await context.tracing.start(screenshots=True, snapshots=True, sources=True)
55+
56+
# Load cookies if they exist
57+
if self.config.cookies_file and os.path.exists(self.config.cookies_file):
58+
with open(self.config.cookies_file, 'r') as f:
59+
cookies = json.load(f)
60+
logger.info(f'Loaded {len(cookies)} cookies from {self.config.cookies_file}')
61+
await context.add_cookies(cookies)
62+
63+
# Expose anti-detection scripts
64+
await context.add_init_script(
65+
"""
66+
// Webdriver property
67+
Object.defineProperty(navigator, 'webdriver', {
68+
get: () => undefined
69+
});
70+
71+
// Languages
72+
Object.defineProperty(navigator, 'languages', {
73+
get: () => ['en-US', 'en']
74+
});
75+
76+
// Plugins
77+
Object.defineProperty(navigator, 'plugins', {
78+
get: () => [1, 2, 3, 4, 5]
79+
});
80+
81+
// Chrome runtime
82+
window.chrome = { runtime: {} };
83+
84+
// Permissions
85+
const originalQuery = window.navigator.permissions.query;
86+
window.navigator.permissions.query = (parameters) => (
87+
parameters.name === 'notifications' ?
88+
Promise.resolve({ state: Notification.permission }) :
89+
originalQuery(parameters)
90+
);
91+
"""
92+
)
93+
94+
return context

src/utils/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# -*- coding: utf-8 -*-
2+
# @Time : 2025/1/1
3+
# @Author : wenshao
4+
5+
# @Project : browser-use-webui
6+
# @FileName: __init__.py.py

src/utils/utils.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
# -*- coding: utf-8 -*-
2+
# @Time : 2025/1/1
3+
# @Author : wenshao
4+
5+
# @Project : browser-use-webui
6+
# @FileName: utils.py
7+
8+
import base64
9+
from langchain_openai import ChatOpenAI, AzureChatOpenAI
10+
from langchain_anthropic import ChatAnthropic
11+
from langchain_google_genai import ChatGoogleGenerativeAI
12+
13+
14+
def get_llm_model(provider: str, **kwargs):
15+
"""
16+
获取LLM 模型
17+
:param provider: 模型类型
18+
:param kwargs:
19+
:return:
20+
"""
21+
if provider == 'claude':
22+
return ChatAnthropic(
23+
model_name=kwargs.get("model_name", 'claude-3-5-sonnet-20240620'),
24+
temperature=kwargs.get("temperature", 0.0),
25+
base_url=kwargs.get("base_url", "https://api.anthropic.com"),
26+
api_key=kwargs.get("api_key", None)
27+
)
28+
elif provider == 'openai':
29+
return ChatOpenAI(
30+
model=kwargs.get("model_name", 'gpt-4o'),
31+
temperature=kwargs.get("temperature", 0.0),
32+
base_url=kwargs.get("base_url", "https://api.openai.com/v1/"),
33+
api_key=kwargs.get("api_key", None)
34+
)
35+
elif provider == 'gemini':
36+
return ChatGoogleGenerativeAI(
37+
model=kwargs.get("model_name", 'gemini-2.0-flash-exp'),
38+
temperature=kwargs.get("temperature", 0.0),
39+
google_api_key=kwargs.get("api_key", None),
40+
)
41+
elif provider == "azure_openai":
42+
return AzureChatOpenAI(
43+
model=kwargs.get("model_name", 'gpt-4o'),
44+
temperature=kwargs.get("temperature", 0.0),
45+
api_version="2024-05-01-preview",
46+
azure_endpoint=kwargs.get("base_url", ""),
47+
api_key=kwargs.get("api_key", None)
48+
)
49+
else:
50+
raise ValueError(f'Unsupported provider: {provider}')
51+
52+
53+
def encode_image(img_path):
54+
if not img_path:
55+
return None
56+
with open(img_path, "rb") as fin:
57+
image_data = base64.b64encode(fin.read()).decode("utf-8")
58+
return image_data

0 commit comments

Comments
 (0)