Skip to content

Commit 0d85c04

Browse files
committed
Update BrowserUse step
1 parent 6a72f9b commit 0d85c04

File tree

3 files changed

+135
-108
lines changed

3 files changed

+135
-108
lines changed
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
import logging
2+
import os
3+
from typing import List, Optional
4+
5+
from browser_use import Browser, BrowserConfig, BrowserContextConfig, Controller
6+
from browser_use.agent.views import ActionResult
7+
from browser_use.browser.context import BrowserContext
8+
9+
logger = logging.getLogger(__name__)
10+
11+
12+
async def set_file_input(index: int, paths: str | List[str], browser: BrowserContext):
13+
"""
14+
Set the file input value to the given path or list of paths.
15+
16+
Args:
17+
index: The DOM element index to target
18+
paths: Local file path or list of local file paths to upload
19+
browser: Browser context for interaction
20+
21+
Returns:
22+
ActionResult: Result of the upload operation
23+
"""
24+
if isinstance(paths, str):
25+
paths = [paths]
26+
27+
for path in paths:
28+
if not os.path.exists(path):
29+
return ActionResult(error=f"File {path} does not exist")
30+
31+
dom_el = await browser.get_dom_element_by_index(index)
32+
file_upload_dom_el = dom_el.get_file_upload_element()
33+
34+
if file_upload_dom_el is None:
35+
msg = f"No file upload element found at index {index}. The element may be hidden or not an input type file"
36+
logger.info(msg)
37+
return ActionResult(error=msg)
38+
39+
file_upload_el = await browser.get_locate_element(file_upload_dom_el)
40+
41+
if file_upload_el is None:
42+
msg = f"No file upload element found at index {index}. The element may be hidden or not an input type file"
43+
logger.info(msg)
44+
return ActionResult(error=msg)
45+
46+
try:
47+
await file_upload_el.set_input_files(paths)
48+
msg = f"Successfully set file input value to {paths}"
49+
logger.info(msg)
50+
return ActionResult(extracted_content=msg, include_in_memory=True)
51+
except Exception as e:
52+
msg = f"Failed to upload file to index {index}: {str(e)}"
53+
logger.info(msg)
54+
return ActionResult(error=msg)
55+
56+
57+
class BrowserInitializer:
58+
"""
59+
Initialize and cache browser and controller instances.
60+
61+
This class uses a singleton pattern to ensure we only create one browser
62+
instance throughout the application lifecycle, which saves resources.
63+
"""
64+
65+
_browser = None
66+
_controller = None
67+
_browser_context = None
68+
69+
@classmethod
70+
def init_browser(cls, config=BrowserConfig()):
71+
"""
72+
Initialize and cache the Browser instance.
73+
74+
Returns:
75+
Browser: Browser instance for web automation
76+
"""
77+
if cls._browser is not None:
78+
return cls._browser
79+
80+
cls._browser = Browser(config=config)
81+
return cls._browser
82+
83+
@classmethod
84+
def init_browser_context(cls, config: Optional[BrowserContextConfig]):
85+
"""
86+
Initialize and cache the BrowserContext instance.
87+
88+
Returns:
89+
BrowserContext: BrowserContext instance for managing browser context
90+
"""
91+
if cls._browser_context is not None:
92+
return cls._browser_context
93+
94+
downloads_path = os.path.join(os.getcwd(), "downloads")
95+
if not os.path.exists(downloads_path):
96+
os.makedirs(downloads_path)
97+
98+
cookies_file = os.path.join(os.getcwd(), "cookies.json")
99+
100+
context_config = BrowserContextConfig(
101+
save_downloads_path=downloads_path, cookies_file=cookies_file, _force_keep_context_alive=True
102+
)
103+
browser = cls.init_browser(config=config)
104+
cls._browser_context = BrowserContext(browser=browser, config=context_config)
105+
return cls._browser_context
106+
107+
@classmethod
108+
def init_controller(cls):
109+
"""
110+
Initialize and cache the Controller instance.
111+
112+
Returns:
113+
Controller: Controller instance for managing browser actions
114+
"""
115+
if cls._controller is not None:
116+
return cls._controller
117+
118+
controller = Controller()
119+
120+
cls._controller = controller
121+
return cls._controller

patchwork/steps/BrowserUse/BrowserUse.py

Lines changed: 11 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -10,111 +10,6 @@
1010

1111
logger = logging.getLogger(__name__)
1212

13-
# Global variables to cache browser initialization
14-
_browser = None
15-
_controller = None
16-
17-
18-
def init_browser():
19-
"""
20-
Initialize and cache browser and controller instances.
21-
22-
This function uses a singleton pattern to ensure we only create one browser
23-
instance throughout the application lifecycle, which saves resources.
24-
25-
Returns:
26-
tuple: (Browser, Controller) instances for web automation
27-
"""
28-
global _browser, _controller
29-
30-
# Return cached instances if already initialized
31-
if _browser is not None and _controller is not None:
32-
return _browser, _controller
33-
34-
from browser_use import Browser, BrowserConfig, BrowserContextConfig, Controller
35-
from browser_use.agent.views import ActionResult
36-
from browser_use.browser.context import BrowserContext
37-
38-
# Set up downloads directory for browser operations
39-
downloads_path = os.path.join(os.getcwd(), "downloads")
40-
if not os.path.exists(downloads_path):
41-
os.makedirs(downloads_path)
42-
43-
context_config = BrowserContextConfig(save_downloads_path=downloads_path)
44-
config = BrowserConfig(headless=True, disable_security=True, new_context_config=context_config)
45-
controller = Controller()
46-
47-
# Register custom action to upload files to web elements
48-
@controller.action(
49-
description="Upload file to interactive element with file path",
50-
)
51-
async def upload_file(index: int, path: str, browser: BrowserContext):
52-
"""
53-
Upload a file to a file input element identified by its index.
54-
55-
Args:
56-
index: The DOM element index to target
57-
path: Local file path to upload
58-
browser: Browser context for interaction
59-
60-
Returns:
61-
ActionResult: Result of the upload operation
62-
"""
63-
if not os.path.exists(path):
64-
return ActionResult(error=f"File {path} does not exist")
65-
66-
dom_el = await browser.get_dom_element_by_index(index)
67-
file_upload_dom_el = dom_el.get_file_upload_element()
68-
69-
if file_upload_dom_el is None:
70-
msg = f"No file upload element found at index {index}. The element may be hidden or not an input type file"
71-
logger.info(msg)
72-
return ActionResult(error=msg)
73-
74-
file_upload_el = await browser.get_locate_element(file_upload_dom_el)
75-
76-
if file_upload_el is None:
77-
msg = f"No file upload element found at index {index}. The element may be hidden or not an input type file"
78-
logger.info(msg)
79-
return ActionResult(error=msg)
80-
81-
try:
82-
await file_upload_el.set_input_files(path)
83-
msg = f"Successfully uploaded file to index {index}"
84-
logger.info(msg)
85-
return ActionResult(extracted_content=msg, include_in_memory=True)
86-
except Exception as e:
87-
msg = f"Failed to upload file to index {index}: {str(e)}"
88-
logger.info(msg)
89-
return ActionResult(error=msg)
90-
91-
# Register custom action to read file contents
92-
@controller.action(description="Read the file content of a file given a path")
93-
async def read_file(path: str):
94-
"""
95-
Read and return the contents of a file at the specified path.
96-
97-
Args:
98-
path: Path to the file to read
99-
100-
Returns:
101-
ActionResult: File contents or error message
102-
"""
103-
if not os.path.exists(path):
104-
return ActionResult(error=f"File {path} does not exist")
105-
106-
with open(path, "r") as f:
107-
content = f.read()
108-
msg = f"File content: {content}"
109-
logger.info(msg)
110-
return ActionResult(extracted_content=msg, include_in_memory=True)
111-
112-
# Cache the initialized instances
113-
_browser = Browser(config=config)
114-
_controller = controller
115-
116-
return _browser, _controller
117-
11813

11914
class BrowserUse(Step, input_class=BrowserUseInputs, output_class=BrowserUseOutputs):
12015
"""
@@ -174,20 +69,29 @@ def run(self) -> dict:
17469
dict: Results of the browser automation task
17570
"""
17671
from browser_use import Agent
72+
from browser_use import BrowserConfig
73+
from patchwork.common.utils.browser_initializer import BrowserInitializer
17774

178-
browser, controller = init_browser()
75+
browser_config = BrowserConfig(headless=self.inputs.get("headless", True), disable_security=True)
76+
browser_context = BrowserInitializer.init_browser_context(browser_config)
77+
controller = BrowserInitializer.init_controller()
78+
logger.info("Browser initialized")
17979
agent = Agent(
180-
browser=browser,
80+
browser_context=browser_context,
18181
controller=controller,
18282
task=mustache_render(self.inputs["task"], self.inputs["task_value"]),
18383
llm=self.llm,
18484
generate_gif=self.generate_gif,
18585
validate_output=True,
86+
initial_actions=self.inputs.get("initial_actions", None),
18687
)
18788

18889
# Run the agent in an event loop
18990
loop = asyncio.new_event_loop()
19091
self.history = loop.run_until_complete(agent.run())
92+
loop.run_until_complete(browser_context.close())
93+
loop.run_until_complete(browser_context.browser.close())
94+
loop.close()
19195

19296
# Format results as JSON if schema provided
19397
if "example_json" in self.inputs:

patchwork/steps/BrowserUse/typed.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing_extensions import Annotated, Any, Dict, Optional, TypedDict
1+
from typing_extensions import Annotated, Any, Dict, Optional, TypedDict, List
22

33
from patchwork.common.utils.step_typing import StepTypeConfig
44

@@ -14,6 +14,8 @@ class BrowserUseInputs(__BrowserUseInputsRequired, total=False):
1414
anthropic_api_key: Annotated[str, StepTypeConfig(or_op=["google_api_key", "openai_api_key"])]
1515
google_api_key: Annotated[str, StepTypeConfig(or_op=["openai_api_key", "anthropic_api_key"])]
1616
generate_gif: Optional[bool]
17+
headless: Optional[bool]
18+
initial_actions: Optional[List[Dict[str, Dict[str, Any]]]]
1719

1820

1921
class BrowserUseOutputs(TypedDict):

0 commit comments

Comments
 (0)