-
Notifications
You must be signed in to change notification settings - Fork 297
feat(workflow): strengthen iframe-aware execution and extraction #103
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 1 commit
ab941a2
c9d6dec
cd9ebc7
cb846ef
1856775
2ccea94
3ec843f
0e46a9f
015fb5c
5bfd65b
207b646
9f3109b
7c4a0cc
1c377b3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,101 @@ | ||
| { | ||
| "name": "Recorded Workflow", | ||
| "description": "Recorded on 8/11/2025, 4:20:47 AM", | ||
| "version": "1.0.0", | ||
| "input_schema": [], | ||
| "steps": [ | ||
| { | ||
| "type": "navigation", | ||
| "timestamp": 1754866228439, | ||
| "tabId": 388342781, | ||
| "url": "https://www.w3schools.com/tags/tryit.asp?filename=tryhtml_iframe" | ||
| }, | ||
| { | ||
| "type": "scroll", | ||
| "timestamp": 1754866228608, | ||
| "tabId": 388342781, | ||
| "targetId": 219, | ||
| "scrollX": 0, | ||
| "scrollY": 7, | ||
| "url": "https://www.w3schools.com/tags/tryit.asp?filename=tryhtml_iframe" | ||
| }, | ||
| { | ||
| "type": "navigation", | ||
| "timestamp": 1754866228634, | ||
| "tabId": 388342781, | ||
| "url": "https://www.w3schools.com/tags/tryit.asp?filename=tryhtml_iframe" | ||
| }, | ||
| { | ||
| "type": "click", | ||
| "timestamp": 1754866228849, | ||
| "tabId": 388342781, | ||
| "url": "https://www.w3schools.com/tags/tryit.asp?filename=tryhtml_iframe", | ||
| "frameUrl": "https://www.w3schools.com/tags/tryit.asp?filename=tryhtml_iframe", | ||
| "frameIdPath": "0", | ||
| "xpath": "id(\"textareawrapper\")/div[1]/div[6]", | ||
| "cssSelector": "div.CodeMirror-scroll", | ||
| "elementTag": "DIV", | ||
| "elementText": "<!DOCTYPE html><html><body><h1>The iframe element</h1><iframe src=\"https://www.w3schools.com\" title=\"W3Schools Free Online Web Tutorials\"></iframe></body></html>" | ||
| }, | ||
| { | ||
| "type": "navigation", | ||
| "timestamp": 1754866230495, | ||
| "tabId": 388342781, | ||
| "url": "https://www.w3schools.com/tags/tryit.asp?filename=tryhtml_iframe" | ||
| }, | ||
| { | ||
| "type": "click", | ||
| "timestamp": 1754866231531, | ||
| "tabId": 388342781, | ||
| "url": "https://www.w3schools.com/tags/tryit.asp?filename=tryhtml_iframe", | ||
| "frameUrl": "https://www.w3schools.com/tags/tryit.asp?filename=tryhtml_iframe", | ||
| "frameIdPath": "0", | ||
| "xpath": "body/div[2]/div[1]/a[4]", | ||
| "cssSelector": "a.w3-button.w3-bar-item.w3-hide-small.topnav-icons.fa.fa-rotate.ga-tryit[href=\"javascript:void(0);\"][title*=\"Change Orientation\"]", | ||
| "elementTag": "A", | ||
| "elementText": "" | ||
| }, | ||
| { | ||
| "type": "navigation", | ||
| "timestamp": 1754866237707, | ||
| "tabId": 388342781, | ||
| "url": "https://www.w3schools.com/tags/tryit.asp?filename=tryhtml_iframe" | ||
| }, | ||
| { | ||
| "type": "click", | ||
| "timestamp": 1754866238574, | ||
| "tabId": 388342781, | ||
| "url": "https://www.w3schools.com/", | ||
| "frameUrl": "https://www.w3schools.com/", | ||
| "frameIdPath": "0.0", | ||
| "xpath": "id(\"subtopnav\")/a[3]", | ||
| "cssSelector": "a.ga-nav[href=\"/js/default.asp\"][title*=\"JavaScript Tutorial\"]", | ||
| "elementTag": "A", | ||
| "elementText": "JAVASCRIPT" | ||
| }, | ||
| { | ||
| "type": "navigation", | ||
| "timestamp": 1754866242778, | ||
| "tabId": 388342781, | ||
| "url": "https://www.w3schools.com/tags/tryit.asp?filename=tryhtml_iframe" | ||
| }, | ||
| { | ||
| "type": "click", | ||
| "timestamp": 1754866242882, | ||
| "tabId": 388342781, | ||
| "url": "https://www.w3schools.com/js/default.asp", | ||
| "frameUrl": "https://www.w3schools.com/js/default.asp", | ||
| "frameIdPath": "0.0", | ||
| "xpath": "id(\"subtopnav\")/a[1]", | ||
| "cssSelector": "a.ga-nav.subtopnav_firstitem[href=\"/html/default.asp\"][title*=\"HTML Tutorial\"]", | ||
| "elementTag": "A", | ||
| "elementText": "HTML" | ||
| }, | ||
| { | ||
| "type": "navigation", | ||
| "timestamp": 1754866246395, | ||
| "tabId": 388342781, | ||
| "url": "https://www.w3schools.com/tags/tryit.asp?filename=tryhtml_iframe" | ||
| } | ||
| ] | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7,6 +7,7 @@ | |
| from browser_use import Browser | ||
| from browser_use.browser.profile import BrowserProfile | ||
| from fastapi import FastAPI | ||
| from patchright.async_api import async_playwright as patchright_async_playwright | ||
|
|
||
| # Assuming views.py is correctly located for this import path | ||
| from workflow_use.recorder.views import ( | ||
|
|
@@ -84,7 +85,32 @@ async def _capture_and_signal_final_workflow(self, trigger_reason: str): | |
| async with self.final_workflow_processed_lock: | ||
| if not self.final_workflow_processed_flag and self.last_workflow_update_event: | ||
| print(f'[Service] Capturing final workflow (Trigger: {trigger_reason}).') | ||
| self.final_workflow_output = self.last_workflow_update_event.payload | ||
| wf = self.last_workflow_update_event.payload | ||
| # Backend safety filter: drop about:blank and obvious ad/analytics iframe navigations | ||
| try: | ||
| clean_steps = [] | ||
| for s in wf.steps: | ||
| st = getattr(s, 'type', None) or (s.get('type') if isinstance(s, dict) else None) | ||
| url = getattr(s, 'url', None) or (s.get('url') if isinstance(s, dict) else None) | ||
sauravpanda marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| if st == 'navigation': | ||
| if not url or url == 'about:blank': | ||
| continue | ||
| from urllib.parse import urlparse | ||
| host = urlparse(url).hostname or '' | ||
| blocked = any( | ||
| pat in host for pat in ( | ||
| 'doubleclick.net', 'googlesyndication.com', 'googleadservices.com', | ||
| 'amazon-adsystem.com', '2mdn.net', 'recaptcha.google.com', 'recaptcha.net', | ||
| 'googletagmanager.com', 'indexww.com', 'adtrafficquality.google' | ||
| ) | ||
| ) | ||
|
Comment on lines
+104
to
+110
|
||
| if blocked: | ||
| continue | ||
| clean_steps.append(s) | ||
| wf.steps = clean_steps | ||
| except Exception as e: | ||
| print(f'[Service] Backend filter failed: {e}') | ||
| self.final_workflow_output = wf | ||
| self.final_workflow_processed_flag = True | ||
| processed_this_call = True | ||
|
|
||
|
|
@@ -96,7 +122,7 @@ async def _capture_and_signal_final_workflow(self, trigger_reason: str): | |
| if trigger_reason == 'RecordingStoppedEvent' and self.browser: | ||
| print('[Service] Attempting to close browser due to RecordingStoppedEvent...') | ||
| try: | ||
| await self.browser.stop() | ||
| await self.browser.close() | ||
| print('[Service] Browser close command issued.') | ||
| except Exception as e_close: | ||
| print(f'[Service] Error closing browser on recording stop: {e_close}') | ||
|
|
@@ -127,7 +153,8 @@ async def _launch_browser_and_wait(self): | |
| ) | ||
|
|
||
| # Create and configure browser | ||
| self.browser = Browser(browser_profile=profile) | ||
| playwright = await patchright_async_playwright().start() | ||
| self.browser = Browser(browser_profile=profile, playwright=playwright) | ||
|
|
||
| print('[Service] Starting browser with extensions...') | ||
| await self.browser.start() | ||
|
|
@@ -150,7 +177,7 @@ async def _launch_browser_and_wait(self): | |
| print('[Service] Browser task cancelled.') | ||
| if self.browser: | ||
| try: | ||
| await self.browser.stop() | ||
| await self.browser.close() | ||
| except: | ||
| pass # Best effort | ||
| raise # Re-raise to be caught by gather | ||
|
|
@@ -218,7 +245,7 @@ async def capture_workflow(self) -> Optional[WorkflowDefinitionSchema]: | |
| print('[Service] Ensuring browser is closed in cleanup...') | ||
| try: | ||
| self.browser.browser_profile.keep_alive = False | ||
| await self.browser.stop() | ||
| await self.browser.close() | ||
| except Exception as e_browser_close: | ||
| print(f'[Service] Error closing browser in final cleanup: {e_browser_close}') | ||
| # self.browser = None | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We have removed playwright dependency in Browser Use library, we would love to not use playwright if possible, can you do it by using browser use actions?