diff --git a/.github/workflows/integration-runner.yml b/.github/workflows/integration-runner.yml index fb5a0a7ea4..a7cff194f6 100644 --- a/.github/workflows/integration-runner.yml +++ b/.github/workflows/integration-runner.yml @@ -244,6 +244,11 @@ jobs: version: latest python-version: ${{ matrix.python-version }} + - name: Install Chromium + run: | + sudo apt-get update + sudo apt-get install -y chromium-browser + - name: Install Python dependencies using uv run: | uv sync --dev diff --git a/openhands-tools/openhands/tools/browser_use/impl.py b/openhands-tools/openhands/tools/browser_use/impl.py index 4099623f43..ed68ceecc2 100644 --- a/openhands-tools/openhands/tools/browser_use/impl.py +++ b/openhands-tools/openhands/tools/browser_use/impl.py @@ -585,6 +585,13 @@ def close(self): finally: # Always close the async executor self._async_executor.close() + # Release the shared executor reference so the class variable + # doesn't keep a stale reference that could prevent process exit. + from openhands.tools.browser_use.definition import BrowserToolSet + + with BrowserToolSet._shared_executor_lock: + if BrowserToolSet._shared_executor is self: + BrowserToolSet._shared_executor = None def __del__(self): """Cleanup on deletion.""" diff --git a/tests/integration/base.py b/tests/integration/base.py index 430260c3f2..8fde0fe85b 100644 --- a/tests/integration/base.py +++ b/tests/integration/base.py @@ -247,10 +247,27 @@ def instruction_message(self) -> Message: return Message(role="user", content=[TextContent(text=self.instruction)]) @property - @abstractmethod + def enable_browser(self) -> bool: + """Whether to enable browser tools. Override in subclasses that need browsing. + + Returns: + False by default. Override to True for tests that require browser access. + """ + return False + + @property def tools(self) -> list[Tool]: - """List of tools available to the agent.""" - pass + """List of tools available to the agent. + + By default, uses the configured tool preset with browser support controlled + by the ``enable_browser`` property. This ensures integration tests validate + the same agent configuration shipped to production (GUI/CLI). + + Override this property in subclasses that need custom tool configurations. + """ + return get_tools_for_preset( + self.tool_preset, enable_browser=self.enable_browser + ) @property def condenser(self) -> CondenserBase | None: @@ -373,3 +390,8 @@ def teardown(self): The workspace directory is torn down externally. Add any additional cleanup (git, server, ...) here if needed. """ + # Close the conversation to ensure all tool executors (including the + # browser / Chrome process) are shut down. Without this, worker + # processes in ProcessPoolExecutor hang indefinitely because the + # browser's background threads keep them alive. + self.conversation.close() diff --git a/tests/integration/run_infer.py b/tests/integration/run_infer.py index 6490791d11..753d04fd05 100755 --- a/tests/integration/run_infer.py +++ b/tests/integration/run_infer.py @@ -297,8 +297,12 @@ def run_evaluation( result = process_instance(instance, llm_config, tool_preset) results.append(result) else: - # Parallel execution - with ProcessPoolExecutor(max_workers=num_workers) as executor: + # Parallel execution – avoid ProcessPoolExecutor context manager + # because worker processes that spawn browser/Chrome subprocesses + # may not exit cleanly, causing shutdown(wait=True) to hang + # indefinitely. + executor = ProcessPoolExecutor(max_workers=num_workers) + try: future_to_instance = { executor.submit( process_instance, instance, llm_config, tool_preset @@ -309,6 +313,8 @@ def run_evaluation( for future in as_completed(future_to_instance): result = future.result() results.append(result) + finally: + executor.shutdown(wait=False, cancel_futures=True) return results diff --git a/tests/integration/tests/b05_do_not_create_redundant_files.py b/tests/integration/tests/b05_do_not_create_redundant_files.py index 95661c7954..b593b14e1d 100644 --- a/tests/integration/tests/b05_do_not_create_redundant_files.py +++ b/tests/integration/tests/b05_do_not_create_redundant_files.py @@ -7,13 +7,7 @@ from textwrap import dedent from openhands.sdk import get_logger -from openhands.sdk.tool import Tool -from tests.integration.base import ( - BaseIntegrationTest, - SkipTest, - TestResult, - get_tools_for_preset, -) +from tests.integration.base import BaseIntegrationTest, SkipTest, TestResult from tests.integration.behavior_utils import ( get_conversation_summary, ) @@ -35,10 +29,6 @@ class NoRedundantFilesTest(BaseIntegrationTest): INSTRUCTION: str = INSTRUCTION - @property - def tools(self) -> list[Tool]: - return get_tools_for_preset(self.tool_preset, enable_browser=False) - def setup(self) -> None: # noqa: D401 """Set up a realistic codebase by cloning the lerobot repo.""" try: diff --git a/tests/integration/tests/t01_fix_simple_typo.py b/tests/integration/tests/t01_fix_simple_typo.py index a5a54c0d76..5fa4725306 100644 --- a/tests/integration/tests/t01_fix_simple_typo.py +++ b/tests/integration/tests/t01_fix_simple_typo.py @@ -3,8 +3,7 @@ import os from openhands.sdk import get_logger -from openhands.sdk.tool import Tool -from tests.integration.base import BaseIntegrationTest, TestResult, get_tools_for_preset +from tests.integration.base import BaseIntegrationTest, TestResult INSTRUCTION = ( @@ -32,11 +31,6 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.document_path: str = os.path.join(self.workspace, "document.txt") - @property - def tools(self) -> list[Tool]: - """List of tools available to the agent based on configured tool preset.""" - return get_tools_for_preset(self.tool_preset, enable_browser=False) - def setup(self) -> None: """Create a text file with typos for the agent to fix.""" # Create the test file with typos diff --git a/tests/integration/tests/t02_add_bash_hello.py b/tests/integration/tests/t02_add_bash_hello.py index e608b86d39..1f0cea7615 100644 --- a/tests/integration/tests/t02_add_bash_hello.py +++ b/tests/integration/tests/t02_add_bash_hello.py @@ -3,8 +3,7 @@ import os from openhands.sdk import get_logger -from openhands.sdk.tool import Tool -from tests.integration.base import BaseIntegrationTest, TestResult, get_tools_for_preset +from tests.integration.base import BaseIntegrationTest, TestResult INSTRUCTION = "Write a shell script 'shell/hello.sh' that prints 'hello'." @@ -22,11 +21,6 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.script_path: str = os.path.join(self.workspace, "shell", "hello.sh") - @property - def tools(self) -> list[Tool]: - """List of tools available to the agent based on configured tool preset.""" - return get_tools_for_preset(self.tool_preset, enable_browser=False) - def setup(self) -> None: """Setup is not needed - agent will create directories as needed.""" diff --git a/tests/integration/tests/t03_jupyter_write_file.py b/tests/integration/tests/t03_jupyter_write_file.py index 11f860e0ee..69df54c839 100644 --- a/tests/integration/tests/t03_jupyter_write_file.py +++ b/tests/integration/tests/t03_jupyter_write_file.py @@ -3,8 +3,7 @@ import os from openhands.sdk import get_logger -from openhands.sdk.tool import Tool -from tests.integration.base import BaseIntegrationTest, TestResult, get_tools_for_preset +from tests.integration.base import BaseIntegrationTest, TestResult INSTRUCTION = ( @@ -25,11 +24,6 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.file_path: str = os.path.join(self.workspace, "test.txt") - @property - def tools(self) -> list[Tool]: - """List of tools available to the agent based on configured tool preset.""" - return get_tools_for_preset(self.tool_preset, enable_browser=False) - def setup(self) -> None: """Setup is not needed - agent will create directories as needed.""" diff --git a/tests/integration/tests/t04_git_staging.py b/tests/integration/tests/t04_git_staging.py index 4a1b633947..d5e8c41e42 100644 --- a/tests/integration/tests/t04_git_staging.py +++ b/tests/integration/tests/t04_git_staging.py @@ -4,8 +4,7 @@ import subprocess from openhands.sdk import get_logger -from openhands.sdk.tool import Tool -from tests.integration.base import BaseIntegrationTest, TestResult, get_tools_for_preset +from tests.integration.base import BaseIntegrationTest, TestResult INSTRUCTION = ( @@ -21,11 +20,6 @@ class GitStagingTest(BaseIntegrationTest): INSTRUCTION: str = INSTRUCTION - @property - def tools(self) -> list[Tool]: - """List of tools available to the agent based on configured tool preset.""" - return get_tools_for_preset(self.tool_preset, enable_browser=False) - def setup(self) -> None: """Set up git repository with staged changes.""" # Initialize git repository diff --git a/tests/integration/tests/t05_simple_browsing.py b/tests/integration/tests/t05_simple_browsing.py index 63940253ad..fd8dca2a05 100644 --- a/tests/integration/tests/t05_simple_browsing.py +++ b/tests/integration/tests/t05_simple_browsing.py @@ -7,8 +7,7 @@ from openhands.sdk import get_logger from openhands.sdk.conversation import get_agent_final_response -from openhands.sdk.tool import Tool -from tests.integration.base import BaseIntegrationTest, TestResult, get_tools_for_preset +from tests.integration.base import BaseIntegrationTest, TestResult INSTRUCTION = "Browse localhost:8000, and tell me the ultimate answer to life." @@ -99,9 +98,9 @@ def __init__(self, *args, **kwargs): self.server_process: subprocess.Popen[bytes] | None = None @property - def tools(self) -> list[Tool]: - """List of tools available to the agent based on configured tool preset.""" - return get_tools_for_preset(self.tool_preset, enable_browser=False) + def enable_browser(self) -> bool: + """Enable browser tools for this browsing test.""" + return True def setup(self) -> None: """Set up a local web server with the HTML file.""" diff --git a/tests/integration/tests/t06_github_pr_browsing.py b/tests/integration/tests/t06_github_pr_browsing.py index 9e2fe20a4e..973d20d8b3 100644 --- a/tests/integration/tests/t06_github_pr_browsing.py +++ b/tests/integration/tests/t06_github_pr_browsing.py @@ -2,8 +2,7 @@ from openhands.sdk import get_logger from openhands.sdk.conversation import get_agent_final_response -from openhands.sdk.tool import Tool -from tests.integration.base import BaseIntegrationTest, TestResult, get_tools_for_preset +from tests.integration.base import BaseIntegrationTest, TestResult INSTRUCTION = ( @@ -21,9 +20,9 @@ class GitHubPRBrowsingTest(BaseIntegrationTest): INSTRUCTION: str = INSTRUCTION @property - def tools(self) -> list[Tool]: - """List of tools available to the agent based on configured tool preset.""" - return get_tools_for_preset(self.tool_preset, enable_browser=False) + def enable_browser(self) -> bool: + """Enable browser tools for this browsing test.""" + return True def setup(self) -> None: """No special setup needed for GitHub PR browsing.""" diff --git a/tests/integration/tests/t07_interactive_commands.py b/tests/integration/tests/t07_interactive_commands.py index b075c8ad70..26617e1b55 100644 --- a/tests/integration/tests/t07_interactive_commands.py +++ b/tests/integration/tests/t07_interactive_commands.py @@ -4,8 +4,7 @@ import os from openhands.sdk import get_logger -from openhands.sdk.tool import Tool -from tests.integration.base import BaseIntegrationTest, TestResult, get_tools_for_preset +from tests.integration.base import BaseIntegrationTest, TestResult INSTRUCTION = ( @@ -38,11 +37,6 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.script_path: str = os.path.join(self.workspace, "python_script.py") - @property - def tools(self) -> list[Tool]: - """List of tools available to the agent based on configured tool preset.""" - return get_tools_for_preset(self.tool_preset, enable_browser=False) - def setup(self) -> None: """Set up the interactive Python script.""" diff --git a/tests/integration/tests/t08_image_file_viewing.py b/tests/integration/tests/t08_image_file_viewing.py index 2fa73dc897..cddb294764 100644 --- a/tests/integration/tests/t08_image_file_viewing.py +++ b/tests/integration/tests/t08_image_file_viewing.py @@ -5,13 +5,7 @@ from openhands.sdk import get_logger from openhands.sdk.conversation.response_utils import get_agent_final_response -from openhands.sdk.tool import Tool -from tests.integration.base import ( - BaseIntegrationTest, - SkipTest, - TestResult, - get_tools_for_preset, -) +from tests.integration.base import BaseIntegrationTest, SkipTest, TestResult INSTRUCTION = ( @@ -41,11 +35,6 @@ def __init__(self, *args, **kwargs): "Please use a model that supports image input." ) - @property - def tools(self) -> list[Tool]: - """List of tools available to the agent based on configured tool preset.""" - return get_tools_for_preset(self.tool_preset, enable_browser=False) - def setup(self) -> None: """Download the OpenHands logo for the agent to analyze.""" try: diff --git a/tests/integration/utils/behavior_helpers.py b/tests/integration/utils/behavior_helpers.py index 826024b619..53564b75c5 100644 --- a/tests/integration/utils/behavior_helpers.py +++ b/tests/integration/utils/behavior_helpers.py @@ -87,7 +87,7 @@ def clone_pinned_software_agent_repo(workspace: str) -> Path: def default_behavior_tools(tool_preset: ToolPresetType = "default") -> list[Tool]: - """Register and return tools for behavior tests based on the tool preset.""" + """Return the default tools for behavior tests based on the tool preset.""" return get_tools_for_preset(tool_preset, enable_browser=False)