From a6cd8aed628994fcd3345fbb0740d03163ddcdd6 Mon Sep 17 00:00:00 2001 From: dennigi Date: Tue, 17 Jun 2025 17:08:20 +0200 Subject: [PATCH] upgrade to bu==0.2.7 --- requirements.txt | 2 +- src/agent/browser_use/browser_use_agent.py | 21 +-- .../deep_research/deep_research_agent.py | 68 +++------ src/browser/__init__.py | 0 src/browser/custom_browser.py | 109 ------------- src/browser/custom_context.py | 22 --- src/controller/custom_controller.py | 18 ++- src/webui/components/browser_settings_tab.py | 15 +- src/webui/components/browser_use_agent_tab.py | 113 ++++++-------- src/webui/webui_manager.py | 10 +- tests/test_agents.py | 144 +++++++----------- 11 files changed, 138 insertions(+), 384 deletions(-) delete mode 100644 src/browser/__init__.py delete mode 100644 src/browser/custom_browser.py delete mode 100644 src/browser/custom_context.py diff --git a/requirements.txt b/requirements.txt index f7055242..db97ad4c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -browser-use==0.1.48 +browser-use==0.2.7 pyperclip==1.9.0 gradio==5.27.0 json-repair diff --git a/src/agent/browser_use/browser_use_agent.py b/src/agent/browser_use/browser_use_agent.py index f7f6107b..cae1f3f9 100644 --- a/src/agent/browser_use/browser_use_agent.py +++ b/src/agent/browser_use/browser_use_agent.py @@ -140,25 +140,6 @@ async def run( finally: # Unregister signal handlers before cleanup signal_handler.unregister() - - if self.settings.save_playwright_script_path: - logger.info( - f'Agent run finished. Attempting to save Playwright script to: {self.settings.save_playwright_script_path}' - ) - try: - # Extract sensitive data keys if sensitive_data is provided - keys = list(self.sensitive_data.keys()) if self.sensitive_data else None - # Pass browser and context config to the saving method - self.state.history.save_as_playwright_script( - self.settings.save_playwright_script_path, - sensitive_data_keys=keys, - browser_config=self.browser.config, - context_config=self.browser_context.config, - ) - except Exception as script_gen_err: - # Log any error during script generation/saving - logger.error(f'Failed to save Playwright script: {script_gen_err}', exc_info=True) - await self.close() if self.settings.generate_gif: @@ -166,4 +147,4 @@ async def run( if isinstance(self.settings.generate_gif, str): output_path = self.settings.generate_gif - create_history_gif(task=self.task, history=self.state.history, output_path=output_path) + create_history_gif(task=self.task, history=self.state.history, output_path=output_path) \ No newline at end of file diff --git a/src/agent/deep_research/deep_research_agent.py b/src/agent/deep_research/deep_research_agent.py index 86be3016..6e2efb48 100644 --- a/src/agent/deep_research/deep_research_agent.py +++ b/src/agent/deep_research/deep_research_agent.py @@ -7,7 +7,8 @@ from pathlib import Path from typing import Any, Dict, List, Optional, TypedDict -from browser_use.browser.browser import BrowserConfig +from browser_use.browser import BrowserProfile, BrowserSession +from browser_use.browser.profile import ViewportSize from langchain_community.tools.file_management import ( ListDirectoryTool, ReadFileTool, @@ -29,10 +30,7 @@ from langgraph.graph import StateGraph from pydantic import BaseModel, Field -from browser_use.browser.context import BrowserContextConfig - from src.agent.browser_use.browser_use_agent import BrowserUseAgent -from src.browser.custom_browser import CustomBrowser from src.controller.custom_controller import CustomController from src.utils.mcp_client import setup_mcp_client_and_tools @@ -77,42 +75,30 @@ async def run_single_browser_task( cdp_url = browser_config.get("cdp_url", None) disable_security = browser_config.get("disable_security", False) - bu_browser = None - bu_browser_context = None + bu_browser_session = None try: + browser_user_data = None logger.info(f"Starting browser task for query: {task_query}") - extra_args = [] if use_own_browser: browser_binary_path = os.getenv("BROWSER_PATH", None) or browser_binary_path if browser_binary_path == "": browser_binary_path = None browser_user_data = browser_user_data_dir or os.getenv("BROWSER_USER_DATA", None) - if browser_user_data: - extra_args += [f"--user-data-dir={browser_user_data}"] else: browser_binary_path = None - - bu_browser = CustomBrowser( - config=BrowserConfig( - headless=headless, - browser_binary_path=browser_binary_path, - extra_browser_args=extra_args, - wss_url=wss_url, - cdp_url=cdp_url, - new_context_config=BrowserContextConfig( - window_width=window_w, - window_height=window_h, - ) - ) + browser_profile = BrowserProfile( + headless=headless, + executable_path=browser_binary_path, + user_data_dir=browser_user_data, + window_size=ViewportSize(width=window_w, height=window_h), + traces_dir=None, + record_video_dir=None, + downloads_path="./tmp/downloads" ) - - context_config = BrowserContextConfig( - save_downloads_path="./tmp/downloads", - window_height=window_h, - window_width=window_w, - force_new_context=True, - ) - bu_browser_context = await bu_browser.new_context(config=context_config) + bu_browser_session = BrowserSession( + browser_profile=browser_profile, + wss_url=wss_url, + cdp_url=cdp_url) # Simple controller example, replace with your actual implementation if needed bu_controller = CustomController() @@ -133,8 +119,7 @@ async def run_single_browser_task( bu_agent_instance = BrowserUseAgent( task=bu_task_prompt, llm=llm, # Use the passed LLM - browser=bu_browser, - browser_context=bu_browser_context, + browser_session=bu_browser_session, controller=bu_controller, use_vision=use_vision, source="webui", @@ -174,20 +159,13 @@ async def run_single_browser_task( ) return {"query": task_query, "error": str(e), "status": "failed"} finally: - if bu_browser_context: - try: - await bu_browser_context.close() - bu_browser_context = None - logger.info("Closed browser context.") - except Exception as e: - logger.error(f"Error closing browser context: {e}") - if bu_browser: + if bu_browser_session: try: - await bu_browser.close() - bu_browser = None - logger.info("Closed browser.") + await bu_browser_session.kill() + bu_browser_session = None + logger.info("Closed browser session.") except Exception as e: - logger.error(f"Error closing browser: {e}") + logger.error(f"Error closing browser session: {e}") if task_key in _BROWSER_AGENT_INSTANCES: del _BROWSER_AGENT_INSTANCES[task_key] @@ -1258,4 +1236,4 @@ async def stop(self): await self._stop_lingering_browsers(self.current_task_id) def close(self): - self.stopped = False + self.stopped = False \ No newline at end of file diff --git a/src/browser/__init__.py b/src/browser/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/src/browser/custom_browser.py b/src/browser/custom_browser.py deleted file mode 100644 index 1556959d..00000000 --- a/src/browser/custom_browser.py +++ /dev/null @@ -1,109 +0,0 @@ -import asyncio -import pdb - -from playwright.async_api import Browser as PlaywrightBrowser -from playwright.async_api import ( - BrowserContext as PlaywrightBrowserContext, -) -from playwright.async_api import ( - Playwright, - async_playwright, -) -from browser_use.browser.browser import Browser, IN_DOCKER -from browser_use.browser.context import BrowserContext, BrowserContextConfig -from playwright.async_api import BrowserContext as PlaywrightBrowserContext -import logging - -from browser_use.browser.chrome import ( - CHROME_ARGS, - CHROME_DETERMINISTIC_RENDERING_ARGS, - CHROME_DISABLE_SECURITY_ARGS, - CHROME_DOCKER_ARGS, - CHROME_HEADLESS_ARGS, -) -from browser_use.browser.context import BrowserContext, BrowserContextConfig -from browser_use.browser.utils.screen_resolution import get_screen_resolution, get_window_adjustments -from browser_use.utils import time_execution_async -import socket - -from .custom_context import CustomBrowserContext - -logger = logging.getLogger(__name__) - - -class CustomBrowser(Browser): - - async def new_context(self, config: BrowserContextConfig | None = None) -> CustomBrowserContext: - """Create a browser context""" - browser_config = self.config.model_dump() if self.config else {} - context_config = config.model_dump() if config else {} - merged_config = {**browser_config, **context_config} - return CustomBrowserContext(config=BrowserContextConfig(**merged_config), browser=self) - - async def _setup_builtin_browser(self, playwright: Playwright) -> PlaywrightBrowser: - """Sets up and returns a Playwright Browser instance with anti-detection measures.""" - assert self.config.browser_binary_path is None, 'browser_binary_path should be None if trying to use the builtin browsers' - - # Use the configured window size from new_context_config if available - if ( - not self.config.headless - and hasattr(self.config, 'new_context_config') - and hasattr(self.config.new_context_config, 'window_width') - and hasattr(self.config.new_context_config, 'window_height') - ): - screen_size = { - 'width': self.config.new_context_config.window_width, - 'height': self.config.new_context_config.window_height, - } - offset_x, offset_y = get_window_adjustments() - elif self.config.headless: - screen_size = {'width': 1920, 'height': 1080} - offset_x, offset_y = 0, 0 - else: - screen_size = get_screen_resolution() - offset_x, offset_y = get_window_adjustments() - - chrome_args = { - f'--remote-debugging-port={self.config.chrome_remote_debugging_port}', - *CHROME_ARGS, - *(CHROME_DOCKER_ARGS if IN_DOCKER else []), - *(CHROME_HEADLESS_ARGS if self.config.headless else []), - *(CHROME_DISABLE_SECURITY_ARGS if self.config.disable_security else []), - *(CHROME_DETERMINISTIC_RENDERING_ARGS if self.config.deterministic_rendering else []), - f'--window-position={offset_x},{offset_y}', - f'--window-size={screen_size["width"]},{screen_size["height"]}', - *self.config.extra_browser_args, - } - - # check if chrome remote debugging port is already taken, - # if so remove the remote-debugging-port arg to prevent conflicts - with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: - if s.connect_ex(('localhost', self.config.chrome_remote_debugging_port)) == 0: - chrome_args.remove(f'--remote-debugging-port={self.config.chrome_remote_debugging_port}') - - browser_class = getattr(playwright, self.config.browser_class) - args = { - 'chromium': list(chrome_args), - 'firefox': [ - *{ - '-no-remote', - *self.config.extra_browser_args, - } - ], - 'webkit': [ - *{ - '--no-startup-window', - *self.config.extra_browser_args, - } - ], - } - - browser = await browser_class.launch( - channel='chromium', # https://github.com/microsoft/playwright/issues/33566 - headless=self.config.headless, - args=args[self.config.browser_class], - proxy=self.config.proxy.model_dump() if self.config.proxy else None, - handle_sigterm=False, - handle_sigint=False, - ) - return browser diff --git a/src/browser/custom_context.py b/src/browser/custom_context.py deleted file mode 100644 index 674191af..00000000 --- a/src/browser/custom_context.py +++ /dev/null @@ -1,22 +0,0 @@ -import json -import logging -import os - -from browser_use.browser.browser import Browser, IN_DOCKER -from browser_use.browser.context import BrowserContext, BrowserContextConfig -from playwright.async_api import Browser as PlaywrightBrowser -from playwright.async_api import BrowserContext as PlaywrightBrowserContext -from typing import Optional -from browser_use.browser.context import BrowserContextState - -logger = logging.getLogger(__name__) - - -class CustomBrowserContext(BrowserContext): - def __init__( - self, - browser: 'Browser', - config: BrowserContextConfig | None = None, - state: Optional[BrowserContextState] = None, - ): - super(CustomBrowserContext, self).__init__(browser=browser, config=config, state=state) diff --git a/src/controller/custom_controller.py b/src/controller/custom_controller.py index 00e050c5..a689b76e 100644 --- a/src/controller/custom_controller.py +++ b/src/controller/custom_controller.py @@ -2,9 +2,11 @@ import pyperclip from typing import Optional, Type, Callable, Dict, Any, Union, Awaitable, TypeVar + +from browser_use import BrowserSession from pydantic import BaseModel from browser_use.agent.views import ActionResult -from browser_use.browser.context import BrowserContext +from browser_use import BrowserSession from browser_use.controller.service import Controller, DoneAction from browser_use.controller.registry.service import Registry, RegisteredAction from main_content_extractor import MainContentExtractor @@ -39,8 +41,8 @@ class CustomController(Controller): def __init__(self, exclude_actions: list[str] = [], output_model: Optional[Type[BaseModel]] = None, - ask_assistant_callback: Optional[Union[Callable[[str, BrowserContext], Dict[str, Any]], Callable[ - [str, BrowserContext], Awaitable[Dict[str, Any]]]]] = None, + ask_assistant_callback: Optional[Union[Callable[[str, BrowserSession], Dict[str, Any]], Callable[ + [str, BrowserSession], Awaitable[Dict[str, Any]]]]] = None, ): super().__init__(exclude_actions=exclude_actions, output_model=output_model) self._register_custom_actions() @@ -57,7 +59,7 @@ def _register_custom_actions(self): "requiring subjective human judgment, needing a physical action performed, encountering complex CAPTCHAs, " "or facing limitations in your capabilities – you must request human assistance." ) - async def ask_for_assistant(query: str, browser: BrowserContext): + async def ask_for_assistant(query: str, browser: BrowserSession): if self.ask_assistant_callback: if inspect.iscoroutinefunction(self.ask_assistant_callback): user_response = await self.ask_assistant_callback(query, browser) @@ -73,7 +75,7 @@ async def ask_for_assistant(query: str, browser: BrowserContext): @self.registry.action( 'Upload file to interactive element with file path ', ) - async def upload_file(index: int, path: str, browser: BrowserContext, available_file_paths: list[str]): + async def upload_file(index: int, path: str, browser: BrowserSession, available_file_paths: list[str]): if path not in available_file_paths: return ActionResult(error=f'File path {path} is not available') @@ -110,7 +112,7 @@ async def upload_file(index: int, path: str, browser: BrowserContext, available_ async def act( self, action: ActionModel, - browser_context: Optional[BrowserContext] = None, + browser_session: Optional[BrowserSession] = None, # page_extraction_llm: Optional[BaseChatModel] = None, sensitive_data: Optional[Dict[str, str]] = None, @@ -132,7 +134,7 @@ async def act( result = await self.registry.execute_action( action_name, params, - browser=browser_context, + browser_session=browser_session, page_extraction_llm=page_extraction_llm, sensitive_data=sensitive_data, available_file_paths=available_file_paths, @@ -179,4 +181,4 @@ def register_mcp_tools(self): async def close_mcp_client(self): if self.mcp_client: - await self.mcp_client.__aexit__(None, None, None) + await self.mcp_client.__aexit__(None, None, None) \ No newline at end of file diff --git a/src/webui/components/browser_settings_tab.py b/src/webui/components/browser_settings_tab.py index 77fbfb52..c265cd54 100644 --- a/src/webui/components/browser_settings_tab.py +++ b/src/webui/components/browser_settings_tab.py @@ -17,15 +17,10 @@ async def close_browser(webui_manager: WebuiManager): webui_manager.bu_current_task.cancel() webui_manager.bu_current_task = None - if webui_manager.bu_browser_context: - logger.info("⚠️ Closing browser context when changing browser config.") - await webui_manager.bu_browser_context.close() - webui_manager.bu_browser_context = None - - if webui_manager.bu_browser: - logger.info("⚠️ Closing browser when changing browser config.") - await webui_manager.bu_browser.close() - webui_manager.bu_browser = None + if webui_manager.bu_browser_session: + logger.info("⚠️ Closing browser session when changing browser config.") + await webui_manager.bu_browser_session.kill() + webui_manager.bu_browser_session = None def create_browser_settings_tab(webui_manager: WebuiManager): """ @@ -158,4 +153,4 @@ async def close_wrapper(): headless.change(close_wrapper) keep_browser_open.change(close_wrapper) disable_security.change(close_wrapper) - use_own_browser.change(close_wrapper) + use_own_browser.change(close_wrapper) \ No newline at end of file diff --git a/src/webui/components/browser_use_agent_tab.py b/src/webui/components/browser_use_agent_tab.py index a488e70d..6c2986a2 100644 --- a/src/webui/components/browser_use_agent_tab.py +++ b/src/webui/components/browser_use_agent_tab.py @@ -6,20 +6,19 @@ from typing import Any, AsyncGenerator, Dict, Optional import gradio as gr +from browser_use import BrowserSession,BrowserProfile # from browser_use.agent.service import Agent from browser_use.agent.views import ( AgentHistoryList, AgentOutput, ) -from browser_use.browser.browser import BrowserConfig -from browser_use.browser.context import BrowserContext, BrowserContextConfig -from browser_use.browser.views import BrowserState +from browser_use.browser.profile import ViewportSize +from browser_use.browser.views import BrowserStateSummary from gradio.components import Component from langchain_core.language_models.chat_models import BaseChatModel from src.agent.browser_use.browser_use_agent import BrowserUseAgent -from src.browser.custom_browser import CustomBrowser from src.controller.custom_controller import CustomController from src.utils import llm_provider from src.webui.webui_manager import WebuiManager @@ -132,7 +131,7 @@ def _format_agent_output(model_output: AgentOutput) -> str: async def _handle_new_step( - webui_manager: WebuiManager, state: BrowserState, output: AgentOutput, step_num: int + webui_manager: WebuiManager, state: BrowserStateSummary, output: AgentOutput, step_num: int ): """Callback for each step taken by the agent, including screenshot display.""" @@ -222,7 +221,7 @@ def _handle_done(webui_manager: WebuiManager, history: AgentHistoryList): async def _ask_assistant_callback( - webui_manager: WebuiManager, query: str, browser_context: BrowserContext + webui_manager: WebuiManager, query: str, browser_session: BrowserSession ) -> Dict[str, Any]: """Callback triggered by the agent's ask_for_assistant action.""" logger.info("Agent requires assistance. Waiting for user input.") @@ -422,9 +421,9 @@ def get_browser_setting(key, default=None): # Pass the webui_manager instance to the callback when wrapping it async def ask_callback_wrapper( - query: str, browser_context: BrowserContext + query: str, browser_session: BrowserSession ) -> Dict[str, Any]: - return await _ask_assistant_callback(webui_manager, query, browser_context) + return await _ask_assistant_callback(webui_manager, query, browser_session) if not webui_manager.bu_controller: webui_manager.bu_controller = CustomController( @@ -438,61 +437,41 @@ async def ask_callback_wrapper( try: # Close existing resources if not keeping open if not keep_browser_open: - if webui_manager.bu_browser_context: - logger.info("Closing previous browser context.") - await webui_manager.bu_browser_context.close() - webui_manager.bu_browser_context = None - if webui_manager.bu_browser: - logger.info("Closing previous browser.") - await webui_manager.bu_browser.close() - webui_manager.bu_browser = None - - # Create Browser if needed - if not webui_manager.bu_browser: - logger.info("Launching new browser instance.") - extra_args = [] + if webui_manager.bu_browser_session: + logger.info("Closing previous browser session.") + await webui_manager.bu_browser_session.kill() + webui_manager.bu_browser_session = None + + # Create Browser Session if needed + if not webui_manager.bu_browser_session: + logger.info("Launching new browser session.") + browser_user_data = None if use_own_browser: browser_binary_path = os.getenv("BROWSER_PATH", None) or browser_binary_path if browser_binary_path == "": browser_binary_path = None browser_user_data = browser_user_data_dir or os.getenv("BROWSER_USER_DATA", None) - if browser_user_data: - extra_args += [f"--user-data-dir={browser_user_data}"] else: browser_binary_path = None - - webui_manager.bu_browser = CustomBrowser( - config=BrowserConfig( - headless=headless, - disable_security=disable_security, - browser_binary_path=browser_binary_path, - extra_browser_args=extra_args, - wss_url=wss_url, - cdp_url=cdp_url, - new_context_config=BrowserContextConfig( - window_width=window_w, - window_height=window_h, - ) - ) - ) - - # Create Context if needed - if not webui_manager.bu_browser_context: - logger.info("Creating new browser context.") - context_config = BrowserContextConfig( - trace_path=save_trace_path if save_trace_path else None, - save_recording_path=save_recording_path + browser_profile = BrowserProfile( + keep_alive=keep_browser_open, + headless=headless, + disable_security=disable_security, + executable_path=browser_binary_path, + user_data_dir=browser_user_data, + window_size=ViewportSize(width=window_w, height=window_h), + traces_dir=save_trace_path if save_trace_path else None, + record_video_dir=save_recording_path if save_recording_path else None, - save_downloads_path=save_download_path if save_download_path else None, - window_height=window_h, - window_width=window_w, + downloads_path=save_download_path if save_download_path else None, ) - if not webui_manager.bu_browser: - raise ValueError("Browser not initialized, cannot create context.") - webui_manager.bu_browser_context = ( - await webui_manager.bu_browser.new_context(config=context_config) + webui_manager.bu_browser_session = BrowserSession( + browser_profile=browser_profile, + wss_url=wss_url, + cdp_url=cdp_url, ) + await webui_manager.bu_browser_session.start() # --- 5. Initialize or Update Agent --- webui_manager.bu_agent_task_id = str(uuid.uuid4()) # New ID for this task run @@ -513,7 +492,7 @@ async def ask_callback_wrapper( # Pass the webui_manager to callbacks when wrapping them async def step_callback_wrapper( - state: BrowserState, output: AgentOutput, step_num: int + state: BrowserStateSummary, output: AgentOutput, step_num: int ): await _handle_new_step(webui_manager, state, output, step_num) @@ -522,15 +501,14 @@ def done_callback_wrapper(history: AgentHistoryList): if not webui_manager.bu_agent: logger.info(f"Initializing new agent for task: {task}") - if not webui_manager.bu_browser or not webui_manager.bu_browser_context: + if not webui_manager.bu_browser_session: raise ValueError( - "Browser or Context not initialized, cannot create agent." + "Browser Session not initialized, cannot create agent." ) webui_manager.bu_agent = BrowserUseAgent( task=task, llm=main_llm, - browser=webui_manager.bu_browser, - browser_context=webui_manager.bu_browser_context, + browser_session=webui_manager.bu_browser_session, controller=webui_manager.bu_controller, register_new_step_callback=step_callback_wrapper, register_done_callback=done_callback_wrapper, @@ -550,8 +528,7 @@ def done_callback_wrapper(history: AgentHistoryList): webui_manager.bu_agent.state.agent_id = webui_manager.bu_agent_task_id webui_manager.bu_agent.add_new_task(task) webui_manager.bu_agent.settings.generate_gif = gif_path - webui_manager.bu_agent.browser = webui_manager.bu_browser - webui_manager.bu_agent.browser_context = webui_manager.bu_browser_context + webui_manager.bu_agent.browser_session = webui_manager.bu_browser_session webui_manager.bu_agent.controller = webui_manager.bu_controller # --- 6. Run Agent Task and Stream Updates --- @@ -660,10 +637,10 @@ def done_callback_wrapper(history: AgentHistoryList): last_chat_len = len(webui_manager.bu_chat_history) # Update Browser View - if headless and webui_manager.bu_browser_context: + if headless and webui_manager.bu_browser_session: try: screenshot_b64 = ( - await webui_manager.bu_browser_context.take_screenshot() + await webui_manager.bu_browser_session.take_screenshot() ) if screenshot_b64: html_content = f'' @@ -745,14 +722,10 @@ def done_callback_wrapper(history: AgentHistoryList): # Close browser/context if requested if should_close_browser_on_finish: - if webui_manager.bu_browser_context: - logger.info("Closing browser context after task.") - await webui_manager.bu_browser_context.close() - webui_manager.bu_browser_context = None - if webui_manager.bu_browser: - logger.info("Closing browser after task.") - await webui_manager.bu_browser.close() - webui_manager.bu_browser = None + if webui_manager.bu_browser_session: + logger.info("Closing browser session after task.") + await webui_manager.bu_browser_session.kill() + webui_manager.bu_browser_session = None # --- 8. Final UI Update --- final_update.update( @@ -1080,4 +1053,4 @@ async def clear_wrapper() -> AsyncGenerator[Dict[Component, Any], None]: pause_resume_button.click( fn=pause_resume_wrapper, inputs=None, outputs=run_tab_outputs ) - clear_button.click(fn=clear_wrapper, inputs=None, outputs=run_tab_outputs) + clear_button.click(fn=clear_wrapper, inputs=None, outputs=run_tab_outputs) \ No newline at end of file diff --git a/src/webui/webui_manager.py b/src/webui/webui_manager.py index 0a9d5e16..71a8c5ac 100644 --- a/src/webui/webui_manager.py +++ b/src/webui/webui_manager.py @@ -10,11 +10,8 @@ import time from gradio.components import Component -from browser_use.browser.browser import Browser -from browser_use.browser.context import BrowserContext +from browser_use.browser import BrowserSession from browser_use.agent.service import Agent -from src.browser.custom_browser import CustomBrowser -from src.browser.custom_context import CustomBrowserContext from src.controller.custom_controller import CustomController from src.agent.deep_research.deep_research_agent import DeepResearchAgent @@ -32,8 +29,7 @@ def init_browser_use_agent(self) -> None: init browser use agent """ self.bu_agent: Optional[Agent] = None - self.bu_browser: Optional[CustomBrowser] = None - self.bu_browser_context: Optional[CustomBrowserContext] = None + self.bu_browser_session: Optional[BrowserSession] = None self.bu_controller: Optional[CustomController] = None self.bu_chat_history: List[Dict[str, Optional[str]]] = [] self.bu_response_event: Optional[asyncio.Event] = None @@ -119,4 +115,4 @@ def load_config(self, config_path: str): config_status: config_status.__class__(value=f"Successfully loaded config: {config_path}") } ) - yield update_components + yield update_components \ No newline at end of file diff --git a/tests/test_agents.py b/tests/test_agents.py index a36561e4..a59686fe 100644 --- a/tests/test_agents.py +++ b/tests/test_agents.py @@ -1,5 +1,6 @@ import pdb +from browser_use.browser.profile import ViewportSize from dotenv import load_dotenv load_dotenv() @@ -18,13 +19,8 @@ async def test_browser_use_agent(): - from browser_use.browser.browser import Browser, BrowserConfig - from browser_use.browser.context import ( - BrowserContextConfig - ) from browser_use.agent.service import Agent - - from src.browser.custom_browser import CustomBrowser + from browser_use.browser import BrowserProfile, BrowserSession from src.controller.custom_controller import CustomController from src.utils import llm_provider from src.agent.browser_use.browser_use_agent import BrowserUseAgent @@ -98,46 +94,32 @@ async def test_browser_use_agent(): use_vision = True # Set to False when using DeepSeek max_actions_per_step = 10 - browser = None - browser_context = None + browser_session = None try: - extra_browser_args = [] + browser_user_data = None if use_own_browser: browser_binary_path = os.getenv("BROWSER_PATH", None) if browser_binary_path == "": browser_binary_path = None browser_user_data = os.getenv("BROWSER_USER_DATA", None) - if browser_user_data: - extra_browser_args += [f"--user-data-dir={browser_user_data}"] else: browser_binary_path = None - browser = CustomBrowser( - config=BrowserConfig( - headless=False, - browser_binary_path=browser_binary_path, - extra_browser_args=extra_browser_args, - new_context_config=BrowserContextConfig( - window_width=window_w, - window_height=window_h, - ) - ) - ) - browser_context = await browser.new_context( - config=BrowserContextConfig( - trace_path=None, - save_recording_path=None, - save_downloads_path="./tmp/downloads", - window_height=window_h, - window_width=window_w, - ) + browser_profile = BrowserProfile( + headless=False, + executable_path=browser_binary_path, + user_data_dir=browser_user_data, + window_size=ViewportSize(width=window_w,height=window_h), + traces_dir=None, + record_video_dir=None, + downloads_path="./tmp/downloads" ) + browser_session = BrowserSession(browser_profile=browser_profile) agent = BrowserUseAgent( # task="download pdf from https://arxiv.org/pdf/2311.16498 and rename this pdf to 'mcp-test.pdf'", task="give me nvidia stock price", llm=llm, - browser=browser, - browser_context=browser_context, + browser_session=browser_session, controller=controller, use_vision=use_vision, max_actions_per_step=max_actions_per_step, @@ -147,7 +129,6 @@ async def test_browser_use_agent(): print("Final Result:") pprint(history.final_result(), indent=4) - print("\nErrors:") pprint(history.errors(), indent=4) @@ -155,33 +136,27 @@ async def test_browser_use_agent(): import traceback traceback.print_exc() finally: - if browser_context: - await browser_context.close() - if browser: - await browser.close() + if browser_session: + await browser_session.kill() if controller: await controller.close_mcp_client() async def test_browser_use_parallel(): - from browser_use.browser.browser import Browser, BrowserConfig - from browser_use.browser.context import ( - BrowserContextConfig, - ) + from browser_use.browser import BrowserProfile, BrowserSession from browser_use.agent.service import Agent - from src.browser.custom_browser import CustomBrowser from src.controller.custom_controller import CustomController from src.utils import llm_provider from src.agent.browser_use.browser_use_agent import BrowserUseAgent - # llm = utils.get_llm_model( - # provider="openai", - # model_name="gpt-4o", - # temperature=0.8, - # base_url=os.getenv("OPENAI_ENDPOINT", ""), - # api_key=os.getenv("OPENAI_API_KEY", ""), - # ) + llm = utils.get_llm_model( + provider="openai", + model_name="gpt-4o", + temperature=0.8, + base_url=os.getenv("OPENAI_ENDPOINT", ""), + api_key=os.getenv("OPENAI_API_KEY", ""), + ) # llm = utils.get_llm_model( # provider="google", @@ -212,13 +187,13 @@ async def test_browser_use_parallel(): window_w, window_h = 1280, 1100 - llm = llm_provider.get_llm_model( - provider="azure_openai", - model_name="gpt-4o", - temperature=0.5, - base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""), - api_key=os.getenv("AZURE_OPENAI_API_KEY", ""), - ) + #llm = llm_provider.get_llm_model( + # provider="azure_openai", + # model_name="gpt-4o", + # temperature=0.5, + # base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""), + # api_key=os.getenv("AZURE_OPENAI_API_KEY", ""), + #) mcp_server_config = { "mcpServers": { @@ -254,43 +229,30 @@ async def test_browser_use_parallel(): use_vision = True # Set to False when using DeepSeek max_actions_per_step = 10 - browser = None - browser_context = None - + browser_session = None try: - extra_browser_args = [] + browser_user_data = None if use_own_browser: browser_binary_path = os.getenv("BROWSER_PATH", None) if browser_binary_path == "": browser_binary_path = None browser_user_data = os.getenv("BROWSER_USER_DATA", None) - if browser_user_data: - extra_browser_args += [f"--user-data-dir={browser_user_data}"] else: browser_binary_path = None - browser = CustomBrowser( - config=BrowserConfig( - headless=False, - browser_binary_path=browser_binary_path, - extra_browser_args=extra_browser_args, - new_context_config=BrowserContextConfig( - window_width=window_w, - window_height=window_h, - ) - ) - ) - browser_context = await browser.new_context( - config=BrowserContextConfig( - trace_path=None, - save_recording_path=None, - save_downloads_path="./tmp/downloads", - window_height=window_h, - window_width=window_w, - force_new_context=True - ) + + browser_profile = BrowserProfile( + headless=False, + executable_path=browser_binary_path, + user_data_dir=browser_user_data, + window_size=ViewportSize(width=window_w, height=window_h), + traces_dir=None, + record_video_dir=None, + downloads_path="./tmp/downloads" ) + browser_session = BrowserSession(browser_profile=browser_profile) + agents = [ - BrowserUseAgent(task=task, llm=llm, browser=browser, controller=controller) + BrowserUseAgent(task=task, llm=llm, browser_session=browser_session, controller=controller) for task in [ 'Search Google for weather in Tokyo', # 'Check Reddit front page title', @@ -306,11 +268,11 @@ async def test_browser_use_parallel(): history = await asyncio.gather(*[agent.run() for agent in agents]) print("Final Result:") - pprint(history.final_result(), indent=4) - + for agent_history in history: + pprint(agent_history.final_result(), indent=4) print("\nErrors:") - pprint(history.errors(), indent=4) - + for agent_history in history: + pprint(agent_history.errors(), indent=4) pdb.set_trace() except Exception: @@ -318,10 +280,8 @@ async def test_browser_use_parallel(): traceback.print_exc() finally: - if browser_context: - await browser_context.close() - if browser: - await browser.close() + if browser_session: + await browser_session.kill() if controller: await controller.close_mcp_client() @@ -397,4 +357,4 @@ async def test_deep_research_agent(): if __name__ == "__main__": asyncio.run(test_browser_use_agent()) # asyncio.run(test_browser_use_parallel()) - # asyncio.run(test_deep_research_agent()) + # asyncio.run(test_deep_research_agent()) \ No newline at end of file