Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 101 additions & 0 deletions workflows/examples/test_iframes.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
{
"name": "Recorded Workflow",
"description": "Recorded on 8/11/2025, 4:20:47 AM",
"version": "1.0.0",
"input_schema": [],
"steps": [
{
"type": "navigation",
"timestamp": 1754866228439,
"tabId": 388342781,
"url": "https://www.w3schools.com/tags/tryit.asp?filename=tryhtml_iframe"
},
{
"type": "scroll",
"timestamp": 1754866228608,
"tabId": 388342781,
"targetId": 219,
"scrollX": 0,
"scrollY": 7,
"url": "https://www.w3schools.com/tags/tryit.asp?filename=tryhtml_iframe"
},
{
"type": "navigation",
"timestamp": 1754866228634,
"tabId": 388342781,
"url": "https://www.w3schools.com/tags/tryit.asp?filename=tryhtml_iframe"
},
{
"type": "click",
"timestamp": 1754866228849,
"tabId": 388342781,
"url": "https://www.w3schools.com/tags/tryit.asp?filename=tryhtml_iframe",
"frameUrl": "https://www.w3schools.com/tags/tryit.asp?filename=tryhtml_iframe",
"frameIdPath": "0",
"xpath": "id(\"textareawrapper\")/div[1]/div[6]",
"cssSelector": "div.CodeMirror-scroll",
"elementTag": "DIV",
"elementText": "<!DOCTYPE html><html><body>​<h1>The iframe element</h1>​<iframe src=\"https://www.w3schools.com\" title=\"W3Schools Free Online Web Tutorials\"></iframe>​</body></html>​"
},
{
"type": "navigation",
"timestamp": 1754866230495,
"tabId": 388342781,
"url": "https://www.w3schools.com/tags/tryit.asp?filename=tryhtml_iframe"
},
{
"type": "click",
"timestamp": 1754866231531,
"tabId": 388342781,
"url": "https://www.w3schools.com/tags/tryit.asp?filename=tryhtml_iframe",
"frameUrl": "https://www.w3schools.com/tags/tryit.asp?filename=tryhtml_iframe",
"frameIdPath": "0",
"xpath": "body/div[2]/div[1]/a[4]",
"cssSelector": "a.w3-button.w3-bar-item.w3-hide-small.topnav-icons.fa.fa-rotate.ga-tryit[href=\"javascript:void(0);\"][title*=\"Change Orientation\"]",
"elementTag": "A",
"elementText": ""
},
{
"type": "navigation",
"timestamp": 1754866237707,
"tabId": 388342781,
"url": "https://www.w3schools.com/tags/tryit.asp?filename=tryhtml_iframe"
},
{
"type": "click",
"timestamp": 1754866238574,
"tabId": 388342781,
"url": "https://www.w3schools.com/",
"frameUrl": "https://www.w3schools.com/",
"frameIdPath": "0.0",
"xpath": "id(\"subtopnav\")/a[3]",
"cssSelector": "a.ga-nav[href=\"/js/default.asp\"][title*=\"JavaScript Tutorial\"]",
"elementTag": "A",
"elementText": "JAVASCRIPT"
},
{
"type": "navigation",
"timestamp": 1754866242778,
"tabId": 388342781,
"url": "https://www.w3schools.com/tags/tryit.asp?filename=tryhtml_iframe"
},
{
"type": "click",
"timestamp": 1754866242882,
"tabId": 388342781,
"url": "https://www.w3schools.com/js/default.asp",
"frameUrl": "https://www.w3schools.com/js/default.asp",
"frameIdPath": "0.0",
"xpath": "id(\"subtopnav\")/a[1]",
"cssSelector": "a.ga-nav.subtopnav_firstitem[href=\"/html/default.asp\"][title*=\"HTML Tutorial\"]",
"elementTag": "A",
"elementText": "HTML"
},
{
"type": "navigation",
"timestamp": 1754866246395,
"tabId": 388342781,
"url": "https://www.w3schools.com/tags/tryit.asp?filename=tryhtml_iframe"
}
]
}
131 changes: 108 additions & 23 deletions workflows/workflow_use/controller/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@

from browser_use import Browser
from browser_use.agent.views import ActionResult
from browser_use.controller import Controller
from browser_use.llm.base import BaseChatModel
from browser_use.controller.service import Controller
from langchain_core.language_models.chat_models import BaseChatModel
from langchain_core.prompts import PromptTemplate

from workflow_use.controller.utils import get_best_element_handle, truncate_selector
from workflow_use.controller.views import (
Expand All @@ -19,7 +20,7 @@

logger = logging.getLogger(__name__)

DEFAULT_ACTION_TIMEOUT_MS = 1000
DEFAULT_ACTION_TIMEOUT_MS = 2500

# List of default actions from browser_use.controller.service.Controller to disable
# todo: come up with a better way to filter out the actions (filter IN the actions would be much nicer in this case)
Expand Down Expand Up @@ -65,9 +66,7 @@ async def navigation(params: NavigationAction, browser_session: Browser) -> Acti
"""Navigate to the given URL."""
page = await browser_session.get_current_page()
await page.goto(params.url)
# Wait for page to load (CDP navigate doesn't wait automatically)
import asyncio
await asyncio.sleep(2)
await page.wait_for_load_state()

msg = f'🔗 Navigated to URL: {params.url}'
logger.info(msg)
Expand All @@ -84,16 +83,101 @@ async def click(params: ClickElementDeterministicAction, browser_session: Browse
page = await browser_session.get_current_page()
original_selector = params.cssSelector

# If frameUrl or frameIdPath are provided, narrow the search to that frame
def _select_context(pg):
try:
from playwright.async_api import Page, Frame
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have removed playwright dependency in Browser Use library, we would love to not use playwright if possible, can you do it by using browser use actions?

ctx: Page | Frame = pg
# If frame hints point to top document, stay on page
fid = getattr(params, 'frameIdPath', None)
furl = getattr(params, 'frameUrl', None)
curr_url = (pg.url or '').split('#')[0] if hasattr(pg, 'url') else ''
if furl and curr_url and furl.split('#')[0] == curr_url:
return pg
if fid:
segs = [s for s in str(fid).split('.') if s != '']
if all(s == '0' for s in segs):
return pg
f = pg.main_frame
for s in segs[1:]: # skip top marker
idx = int(s)
if 0 <= idx < len(f.child_frames):
f = f.child_frames[idx]
else:
return pg
return f
if furl:
from urllib.parse import urlparse
pf = urlparse(furl)
# If frameUrl equals current page URL (origin+path), stay on page
try:
from urllib.parse import urlparse as _u
cu = _u(curr_url)
if (cu.scheme, cu.netloc, cu.path) == (pf.scheme, pf.netloc, pf.path):
return pg
except Exception:
pass
for fr in pg.frames:
try:
ff = urlparse(fr.url)
if (ff.scheme, ff.netloc) == (pf.scheme, pf.netloc) and fr.url.startswith(furl):
return fr
except Exception:
continue
except Exception:
return pg
return ctx

# Fallback: search all frames for selector (prefer frames matching target origin)
async def _find_in_frames(pg, selector: str):
from urllib.parse import urlparse
prefer = getattr(params, 'frameUrl', None) or getattr(params, 'url', None) or ''
pref_o = urlparse(prefer) if prefer else None
frames = list(pg.frames)
def score(fr):
if not pref_o:
return 0
try:
fo = urlparse(fr.url)
return 2 if (fo.scheme, fo.netloc) == (pref_o.scheme, pref_o.netloc) else 0
except Exception:
return 0
frames.sort(key=score, reverse=True)
for fr in frames:
try:
loc, used = await get_best_element_handle(fr, selector, params, timeout_ms=max(800, DEFAULT_ACTION_TIMEOUT_MS // 2))
return fr, loc, used
except Exception:
continue
return None, None, None

try:
locator, selector_used = await get_best_element_handle(
page,
params.cssSelector,
params,
timeout_ms=DEFAULT_ACTION_TIMEOUT_MS,
)
# Only auto-navigate for top-document clicks (no frame hints) when a different URL is declared
curr = (page.url or '').split('#')[0]
declared_url = (getattr(params, 'url', None) or '').split('#')[0]
has_frame_hints = bool(getattr(params, 'frameIdPath', None) or getattr(params, 'frameUrl', None))
if declared_url and declared_url.startswith('http') and not has_frame_hints and curr != declared_url:
await page.goto(declared_url)
await page.wait_for_load_state()

ctx = _select_context(page)
try:
locator, selector_used = await get_best_element_handle(
ctx,
params.cssSelector,
params,
timeout_ms=DEFAULT_ACTION_TIMEOUT_MS,
)
except Exception:
# Fallback: search all frames
fr, locator, selector_used = await _find_in_frames(page, params.cssSelector)
if locator is None:
raise

await locator.click(force=True)

msg = f'🖱️ Clicked element with CSS selector: {truncate_selector(selector_used)} (original: {truncate_selector(original_selector)})'
used_str = selector_used if isinstance(selector_used, str) and selector_used else params.cssSelector
msg = f'🖱️ Clicked element with CSS selector: {truncate_selector(used_str)} (original: {truncate_selector(original_selector)})'
logger.info(msg)
return ActionResult(extracted_content=msg, include_in_memory=True)
except Exception as e:
Expand Down Expand Up @@ -201,7 +285,7 @@ async def key_press(params: KeyPressDeterministicAction, browser_session: Browse
async def scroll(params: ScrollDeterministicAction, browser_session: Browser) -> ActionResult:
"""Scroll the page by the given x/y pixel offsets."""
page = await browser_session.get_current_page()
await page.evaluate(f'() => window.scrollBy({params.scrollX}, {params.scrollY})')
await page.evaluate(f'window.scrollBy({params.scrollX}, {params.scrollY});')
msg = f'📜 Scrolled page by (x={params.scrollX}, y={params.scrollY})'
logger.info(msg)
return ActionResult(extracted_content=msg, include_in_memory=True)
Expand All @@ -220,18 +304,19 @@ async def extract_page_content(

strip = ['a', 'img']

# Get page HTML content using CDP evaluate
html_content = await page.evaluate('() => document.documentElement.outerHTML')
content = markdownify.markdownify(html_content, strip=strip)
content = markdownify.markdownify(await page.content(), strip=strip)

# Note: iframe content extraction is not yet supported in CDP-based implementation
# TODO: Implement iframe content extraction using CDP
# manually append iframe text into the content so it's readable by the LLM (includes cross-origin iframes)
for iframe in page.frames:
if iframe.url != page.url and not iframe.url.startswith('data:'):
content += f'\n\nIFRAME {iframe.url}:\n'
content += markdownify.markdownify(await iframe.content())

prompt = f'Your task is to extract the content of the page. You will be given a page and a goal and you should extract all relevant information around this goal from the page. If the goal is vague, summarize the page. Respond in json format. Extraction goal: {params.goal}, Page: {content}'
prompt = 'Your task is to extract the content of the page. You will be given a page and a goal and you should extract all relevant information around this goal from the page. If the goal is vague, summarize the page. Respond in json format. Extraction goal: {goal}, Page: {page}'
template = PromptTemplate(input_variables=['goal', 'page'], template=prompt)
try:
from browser_use.llm import UserMessage
output = await page_extraction_llm.ainvoke([UserMessage(content=prompt)])
msg = f'📄 Extracted from page\n: {output.completion}\n'
output = await page_extraction_llm.ainvoke(template.format(goal=params.goal, page=content))
msg = f'📄 Extracted from page\n: {output.content}\n'
logger.info(msg)
return ActionResult(extracted_content=msg, include_in_memory=True)
except Exception as e:
Expand Down
7 changes: 4 additions & 3 deletions workflows/workflow_use/controller/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,8 @@ class Config:

# Mixin for shared step metadata (timestamp and tab context)
class StepMeta(_BaseExtra):
# timestamp: int
# tabId: int
pass
timestamp: int
tabId: int


# Common optional fields present in recorder events
Expand All @@ -24,6 +23,8 @@ class RecorderBase(StepMeta):
elementTag: Optional[str] = None
elementText: Optional[str] = None
frameUrl: Optional[str] = None
frameIdPath: Optional[str] = None
url: Optional[str] = None
screenshot: Optional[str] = None


Expand Down
37 changes: 32 additions & 5 deletions workflows/workflow_use/recorder/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from browser_use import Browser
from browser_use.browser.profile import BrowserProfile
from fastapi import FastAPI
from patchright.async_api import async_playwright as patchright_async_playwright

# Assuming views.py is correctly located for this import path
from workflow_use.recorder.views import (
Expand Down Expand Up @@ -84,7 +85,32 @@ async def _capture_and_signal_final_workflow(self, trigger_reason: str):
async with self.final_workflow_processed_lock:
if not self.final_workflow_processed_flag and self.last_workflow_update_event:
print(f'[Service] Capturing final workflow (Trigger: {trigger_reason}).')
self.final_workflow_output = self.last_workflow_update_event.payload
wf = self.last_workflow_update_event.payload
# Backend safety filter: drop about:blank and obvious ad/analytics iframe navigations
try:
clean_steps = []
for s in wf.steps:
st = getattr(s, 'type', None) or (s.get('type') if isinstance(s, dict) else None)
url = getattr(s, 'url', None) or (s.get('url') if isinstance(s, dict) else None)
if st == 'navigation':
if not url or url == 'about:blank':
continue
from urllib.parse import urlparse
host = urlparse(url).hostname or ''
blocked = any(
pat in host for pat in (
'doubleclick.net', 'googlesyndication.com', 'googleadservices.com',
'amazon-adsystem.com', '2mdn.net', 'recaptcha.google.com', 'recaptcha.net',
'googletagmanager.com', 'indexww.com', 'adtrafficquality.google'
)
)
Comment on lines +104 to +110
Copy link

Copilot AI Oct 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The substring matching approach (e.g., 'doubleclick.net' in host) could incorrectly match legitimate domains like mydoubleclick.net.example.com. Consider using exact domain matching or suffix matching instead.

Copilot uses AI. Check for mistakes.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we make this suffix matching?

if blocked:
continue
clean_steps.append(s)
wf.steps = clean_steps
except Exception as e:
print(f'[Service] Backend filter failed: {e}')
self.final_workflow_output = wf
self.final_workflow_processed_flag = True
processed_this_call = True

Expand All @@ -96,7 +122,7 @@ async def _capture_and_signal_final_workflow(self, trigger_reason: str):
if trigger_reason == 'RecordingStoppedEvent' and self.browser:
print('[Service] Attempting to close browser due to RecordingStoppedEvent...')
try:
await self.browser.stop()
await self.browser.close()
print('[Service] Browser close command issued.')
except Exception as e_close:
print(f'[Service] Error closing browser on recording stop: {e_close}')
Expand Down Expand Up @@ -127,7 +153,8 @@ async def _launch_browser_and_wait(self):
)

# Create and configure browser
self.browser = Browser(browser_profile=profile)
playwright = await patchright_async_playwright().start()
self.browser = Browser(browser_profile=profile, playwright=playwright)

print('[Service] Starting browser with extensions...')
await self.browser.start()
Expand All @@ -150,7 +177,7 @@ async def _launch_browser_and_wait(self):
print('[Service] Browser task cancelled.')
if self.browser:
try:
await self.browser.stop()
await self.browser.close()
except:
pass # Best effort
raise # Re-raise to be caught by gather
Expand Down Expand Up @@ -218,7 +245,7 @@ async def capture_workflow(self) -> Optional[WorkflowDefinitionSchema]:
print('[Service] Ensuring browser is closed in cleanup...')
try:
self.browser.browser_profile.keep_alive = False
await self.browser.stop()
await self.browser.close()
except Exception as e_browser_close:
print(f'[Service] Error closing browser in final cleanup: {e_browser_close}')
# self.browser = None
Expand Down
Loading