diff --git a/extension/src/entrypoints/content.ts b/extension/src/entrypoints/content.ts index 8f7f233f..bfa81bdf 100644 --- a/extension/src/entrypoints/content.ts +++ b/extension/src/entrypoints/content.ts @@ -544,6 +544,8 @@ function handleBlur(event: FocusEvent) { export default defineContentScript({ matches: [""], + allFrames: true, + matchAboutBlank: true, main(ctx) { // Listener for status updates from the background script chrome.runtime.onMessage.addListener((message, sender, sendResponse) => { diff --git a/workflows/workflow_use/builder/service.py b/workflows/workflow_use/builder/service.py index b260ce5a..b855ec2a 100644 --- a/workflows/workflow_use/builder/service.py +++ b/workflows/workflow_use/builder/service.py @@ -187,15 +187,20 @@ async def build_workflow( images_used = 0 for step in input_workflow.steps: step_messages: List[Dict[str, Any]] = [] # Messages for this specific step + + step_dict = step.model_dump(mode='json', exclude_none=True) + step_type = getattr(step, 'type', step_dict.get('type')) + step_url = getattr(step, 'url', step_dict.get('url', '')) + # Skip steps to avoid processing empty or irrelevant navigation steps, mostly from iframes. + if step_type == 'navigation' and step_url == 'about:blank': + continue # 1. Text representation (JSON dump) - step_dict = step.model_dump(mode='json', exclude_none=True) screenshot_data = step_dict.pop('screenshot', None) # Pop potential screenshot step_messages.append({'type': 'text', 'text': json.dumps(step_dict, indent=2)}) # 2. Optional screenshot - attach_image = use_screenshots and images_used < max_images - step_type = getattr(step, 'type', step_dict.get('type')) + attach_image = use_screenshots and images_used < max_images if attach_image and step_type != 'input': # Don't attach for inputs # Re-retrieve screenshot data if it wasn't popped (e.g., nested under 'data')