diff --git a/extension/src/entrypoints/background.ts b/extension/src/entrypoints/background.ts index 19928f2..ff4f175 100644 --- a/extension/src/entrypoints/background.ts +++ b/extension/src/entrypoints/background.ts @@ -6,7 +6,6 @@ import { StoredCustomKeyEvent, StoredEvent, StoredRrwebEvent, - StoredExtractionEvent, } from "../lib/types"; import { ClickStep, @@ -16,7 +15,6 @@ import { ScrollStep, Step, Workflow, - ExtractStep, } from "../lib/workflow-types"; import { HttpEvent, @@ -32,9 +30,21 @@ export default defineBackground(() => { // Store tab information (URL, potentially title) const tabInfo: { [tabId: number]: { url?: string; title?: string } } = {}; - // Track recent user interactions to distinguish intentional vs side-effect navigation - const recentUserInteractions: { [tabId: number]: number } = {}; // timestamp of last user interaction - const USER_INTERACTION_TIMEOUT = 5000; // 5 seconds (increased for testing) + // Track which tabs have been explicitly activated (brought to foreground) by the user. + // We will ignore events originating from tabs that were never activated to reduce noise + // (for example: ad / tracker tabs that load in the background). + const activatedTabs = new Set(); + + // Track user clicks that are likely to open a new tab (Ctrl/Cmd + click, target=_blank etc.). + // Content scripts will send a PREPARE_NEW_TAB signal; we keep timestamp to correlate + // shortly following chrome.tabs.onCreated events so we can mark those tabs as user initiated. + const recentNewTabIntents: { [openerTabId: number]: number } = {}; + // Record iframe URLs that the user actually interacted with (via custom events) per tab + const interactedFrameUrls: Record> = {}; + // Additionally track last interaction time per frame for time-window gating + const interactedFrameTimes: Record> = {}; + // Heuristic window (ms) within which a created tab following a user intent is considered relevant. + const NEW_TAB_INTENT_WINDOW_MS = 4000; let isRecordingEnabled = true; // Default to disabled (OFF) let lastWorkflowHash: string | null = null; // Cache for the last logged workflow hash @@ -69,100 +79,83 @@ export default defineBackground(() => { } } - // Function to generate step descriptions for semantic workflows - function generateStepDescription(step: Step): string | null { - switch (step.type) { - case "click": - return "Click element"; - case "input": - return "Input element"; - case "navigation": - return `Navigate to ${step.url}`; - case "scroll": - return null; // Scroll steps will have null description like in the example - case "key_press": - return "Key press element"; - case "extract": - return "Extract information with AI"; - default: - return "Unknown action"; - } - } - // Function to broadcast workflow data updates to the console bus async function broadcastWorkflowDataUpdate(): Promise { + await settingsReady; // console.log("[DEBUG] broadcastWorkflowDataUpdate: Entered function"); // Optional: Keep for debugging - const allSteps: Step[] = Object.keys(sessionLogs) + const rawSteps: Step[] = Object.keys(sessionLogs) .flatMap((tabIdStr) => { const tabId = parseInt(tabIdStr, 10); return convertStoredEventsToSteps(sessionLogs[tabId] || []); }) .sort((a, b) => a.timestamp - b.timestamp); // Sort chronologically - console.log(`πŸ”„ Processing ${allSteps.length} steps for workflow update`); - const extractionSteps = allSteps.filter(s => (s as any).type === 'extract'); - console.log(`πŸ€– Found ${extractionSteps.length} extraction steps:`, extractionSteps); - - // Convert steps to semantic format with proper descriptions - const semanticSteps = allSteps.map((step, index) => { - const semanticStep: any = { - ...step, - description: generateStepDescription(step), - }; - - // Remove internal fields that shouldn't be in the final workflow - delete semanticStep.timestamp; - delete semanticStep.tabId; - delete semanticStep.frameUrl; - delete semanticStep.xpath; - delete semanticStep.elementTag; - delete semanticStep.elementText; - delete semanticStep.screenshot; - - // Handle different step types specifically - if (step.type === "scroll") { - delete semanticStep.targetId; - // Keep scrollX and scrollY for scroll steps - } else if (step.type === "extract") { - // For extraction steps, preserve extractionGoal and url - // Keep: extractionGoal, url, type, description - // Already removed: timestamp, tabId, screenshot (these are correct to remove) - console.log(`πŸ€– Processing extraction step:`, semanticStep); + // Post-process to collapse consecutive duplicates that only differ by timestamp (e.g. repeated identical navigations) + const allSteps: Step[] = []; + for (const step of rawSteps) { + const last = allSteps.length ? allSteps[allSteps.length - 1] : null; + if (!last) { + allSteps.push(step); + continue; } - - // Convert targetText to target_text for semantic workflow compatibility - if (semanticStep.targetText) { - semanticStep.target_text = semanticStep.targetText; - delete semanticStep.targetText; + let isDuplicate = false; + if (last.type === step.type) { + switch (step.type) { + case 'navigation': + isDuplicate = (last as NavigationStep).url === (step as NavigationStep).url && last.tabId === step.tabId; + break; + case 'input': + isDuplicate = + last.tabId === step.tabId && + (last as any).url === (step as any).url && + (last as any).frameUrl === (step as any).frameUrl && + (last as any).xpath === (step as any).xpath && + (last as any).elementTag === (step as any).elementTag && + (last as any).value === (step as any).value; + break; + case 'click': + isDuplicate = + last.tabId === step.tabId && + (last as any).url === (step as any).url && + (last as any).frameUrl === (step as any).frameUrl && + (last as any).xpath === (step as any).xpath && + (last as any).elementTag === (step as any).elementTag && + (last as any).elementText === (step as any).elementText; + break; + case 'scroll': { + const sameXY = (last as any).scrollX === (step as any).scrollX && (last as any).scrollY === (step as any).scrollY; + const sameUrl = (last as any).url === (step as any).url; + const nearTime = Math.abs(step.timestamp - last.timestamp) < 200; + isDuplicate = last.tabId === step.tabId && sameXY && sameUrl && nearTime; + break; + } + case 'key_press': + isDuplicate = + last.tabId === step.tabId && + (last as any).url === (step as any).url && + (last as any).key === (step as any).key && + (last as any).xpath === (step as any).xpath; + break; + } + } + if (isDuplicate) { + // Update timestamp (and screenshot if present) to most recent but don't add new step + last.timestamp = step.timestamp; + if ((step as any).screenshot) { + (last as any).screenshot = (step as any).screenshot; + } } else { - // Ensure target_text field exists (set to null if no semantic text available) - semanticStep.target_text = null; + allSteps.push(step); } - - return semanticStep; - }); - - const semanticExtractionSteps = semanticSteps.filter(s => s.type === 'extract'); - console.log(`βœ… Final semantic steps include ${semanticExtractionSteps.length} extraction steps:`, semanticExtractionSteps); - - // Create the workflowData object for the Python server (semantic format) - const semanticWorkflowData: Workflow = { - workflow_analysis: "Semantic version of recorded workflow. Uses visible text to identify elements instead of CSS selectors for improved reliability.", - name: "Recorded Workflow (Semantic)", - description: `Recorded on ${new Date().toLocaleString()}`, - version: "1.0", - input_schema: [], - steps: semanticSteps, // Use processed semantic steps - }; + } - // Create the workflowData object for the UI (with original targetText) - const uiWorkflowData: Workflow = { - workflow_analysis: "Semantic version of recorded workflow. Uses visible text to identify elements instead of CSS selectors for improved reliability.", - name: "Recorded Workflow (Semantic)", + // Create the workflowData object *after* sorting steps, but hash only steps + const workflowData: Workflow = { + name: "Recorded Workflow", description: `Recorded on ${new Date().toLocaleString()}`, - version: "1.0", + version: "1.0.0", input_schema: [], - steps: allSteps, // Use original steps with targetText for UI + steps: allSteps, // allSteps is used here }; const allStepsString = JSON.stringify(allSteps); // Hash based on allSteps @@ -173,20 +166,20 @@ export default defineBackground(() => { // Condition to skip logging if the hash of steps is the same if (lastWorkflowHash !== null && currentWorkflowHash === lastWorkflowHash) { // console.log("[DEBUG] broadcastWorkflowDataUpdate: Steps unchanged, skipping log."); // Optional - return uiWorkflowData; + return workflowData; } lastWorkflowHash = currentWorkflowHash; - // console.log("[DEBUG] broadcastWorkflowDataUpdate: Steps changed, workflowData object:", JSON.parse(JSON.stringify(uiWorkflowData))); // Optional + // console.log("[DEBUG] broadcastWorkflowDataUpdate: Steps changed, workflowData object:", JSON.parse(JSON.stringify(workflowData))); // Optional - // Send semantic workflow update to Python server + // Send workflow update to Python server const eventToSend: HttpWorkflowUpdateEvent = { type: "WORKFLOW_UPDATE", timestamp: Date.now(), - payload: semanticWorkflowData, // Send semantic format to server + payload: workflowData, }; sendEventToServer(eventToSend); - return uiWorkflowData; // Return UI format to extension + return workflowData; } // Function to broadcast the recording status to all content scripts and sidepanel @@ -227,6 +220,16 @@ export default defineBackground(() => { console.log(`Sending ${type}:`, payload); const tabId = payload.tabId; if (tabId) { + // Skip capturing events for tabs that have never been activated AND are not the original opener + // unless we have positively identified them as a recent user initiated tab (click intent -> creation). + if ( + type !== "CUSTOM_TAB_ACTIVATED" && + !activatedTabs.has(tabId) && + !(payload.openerTabId && recentNewTabIntents[payload.openerTabId] && Date.now() - recentNewTabIntents[payload.openerTabId] < NEW_TAB_INTENT_WINDOW_MS) + ) { + // Silently ignore background noise (ad/tracker tabs) until user actually focuses them. + return; + } if (!sessionLogs[tabId]) { sessionLogs[tabId] = []; } @@ -254,6 +257,12 @@ export default defineBackground(() => { url: tab.pendingUrl || tab.url, windowId: tab.windowId, index: tab.index, + userInitiated: + !!( + tab.openerTabId && + recentNewTabIntents[tab.openerTabId] && + Date.now() - recentNewTabIntents[tab.openerTabId] < NEW_TAB_INTENT_WINDOW_MS + ), }); }); @@ -271,6 +280,7 @@ export default defineBackground(() => { }); chrome.tabs.onActivated.addListener((activeInfo) => { + activatedTabs.add(activeInfo.tabId); sendTabEvent("CUSTOM_TAB_ACTIVATED", { tabId: activeInfo.tabId, windowId: activeInfo.windowId, @@ -295,119 +305,134 @@ export default defineBackground(() => { // --- Conversion Function --- + const DEFAULT_SETTINGS = { + enableIframes: true as boolean, + iframeWindow: 3000 as number, + blocklist: [ + 'doubleclick.net','googlesyndication.com','googleadservices.com', + 'amazon-adsystem.com','2mdn.net','recaptcha.google.com','recaptcha.net', + 'googletagmanager.com','indexww.com','adtrafficquality.google','gstaticadssl.googleapis.com' + ] as string[], + allowlist: [] as string[], + }; + let settings: { enableIframes: boolean; iframeWindow: number; blocklist: string[]; allowlist: string[] } = { ...DEFAULT_SETTINGS }; + const settingsReady = new Promise((resolve) => { + chrome.storage.sync.get(DEFAULT_SETTINGS, (s: any) => { + settings = { ...DEFAULT_SETTINGS, ...s }; + resolve(); + }); + }); + chrome.storage.onChanged.addListener((changes, area) => { + if (area !== 'sync') return; + const next = { ...settings } as any; + for (const k of Object.keys(changes)) (next as any)[k] = (changes as any)[k].newValue; + settings = next; + }); + function convertStoredEventsToSteps(events: StoredEvent[]): Step[] { const steps: Step[] = []; + const lastNavigationIndexByTab: Record = {}; + const lastInputPerKey: Record = {}; for (const event of events) { switch (event.messageType) { - case "CUSTOM_CLICK_EVENT": { - const clickEvent = event as StoredCustomClickEvent; - // Ensure required fields are present, even if optional in source type for some reason + case "CUSTOM_TAB_CREATED": + case "CUSTOM_TAB_UPDATED": + case "CUSTOM_TAB_ACTIVATED": { + const navUrl = (event as any).url || (event as any).changeInfo?.url; + if (!navUrl) break; + const tabId = (event as any).tabId; + const userInitiated = (event as any).userInitiated; + if (!activatedTabs.has(tabId) && !userInitiated) break; // suppress background noise + + const existingIdx = lastNavigationIndexByTab[tabId]; if ( - clickEvent.url && - clickEvent.frameUrl && - clickEvent.xpath && - clickEvent.elementTag + existingIdx !== undefined && + steps[existingIdx] && + steps[existingIdx].type === "navigation" ) { + // Update existing navigation (redirect / title change) + (steps[existingIdx] as NavigationStep).url = navUrl; + steps[existingIdx].timestamp = event.timestamp; + } else { + const nav: NavigationStep = { + type: "navigation", + timestamp: event.timestamp, + tabId, + url: navUrl, + }; + steps.push(nav); + lastNavigationIndexByTab[tabId] = steps.length - 1; + } + break; + } + case "CUSTOM_CLICK_EVENT": { + const click = event as StoredCustomClickEvent; + if (click.url && click.xpath && click.elementTag) { const step: ClickStep = { type: "click", - timestamp: clickEvent.timestamp, - tabId: clickEvent.tabId, - url: clickEvent.url, - frameUrl: clickEvent.frameUrl, - xpath: clickEvent.xpath, - elementTag: clickEvent.elementTag, - elementText: clickEvent.elementText, - screenshot: clickEvent.screenshot, + timestamp: click.timestamp, + tabId: click.tabId, + url: click.url, + frameUrl: click.frameUrl, + frameIdPath: (click as any).frameIdPath, + xpath: click.xpath, + cssSelector: click.cssSelector, + elementTag: click.elementTag, + elementText: click.elementText, + targetText: (click as any).targetText, + screenshot: click.screenshot, }; - - // Prioritize target_text for semantic workflows, but include cssSelector for complex elements - if (clickEvent.targetText && clickEvent.targetText.trim()) { - step.targetText = clickEvent.targetText; - - // For radio buttons, checkboxes, and complex interactive elements, also include cssSelector - if (clickEvent.cssSelector && - (clickEvent.cssSelector.includes('radio') || - clickEvent.cssSelector.includes('checkbox') || - clickEvent.cssSelector.includes('role="radio"') || - clickEvent.cssSelector.includes('role="checkbox"') || - clickEvent.elementTag.toLowerCase() === 'button')) { - step.cssSelector = clickEvent.cssSelector; - } - } else if (clickEvent.cssSelector) { - step.cssSelector = clickEvent.cssSelector; - } - steps.push(step); } else { - console.warn("Skipping incomplete CUSTOM_CLICK_EVENT:", clickEvent); + console.warn("Skipping incomplete CUSTOM_CLICK_EVENT", click); } break; } - case "CUSTOM_INPUT_EVENT": { const inputEvent = event as StoredCustomInputEvent; - if ( - inputEvent.url && - // inputEvent.frameUrl && // frameUrl might be null/undefined in some cases, let's allow merging if only one is present or both match - inputEvent.xpath && - inputEvent.elementTag - ) { + if (inputEvent.url && inputEvent.xpath && inputEvent.elementTag) { + const key = `${inputEvent.tabId}|${inputEvent.xpath}`; + const prior = lastInputPerKey[key]; + const nowTs = inputEvent.timestamp; + const isEmpty = (inputEvent as any).value === ""; + if (isEmpty && prior && prior.value === "" && nowTs - prior.ts < 5000) { + // collapse rapid-fire repeated empties + steps[prior.idx].timestamp = nowTs; + break; + } const lastStep = steps.length > 0 ? steps[steps.length - 1] : null; - - // Check if the last step was a mergeable input event if ( lastStep && lastStep.type === "input" && lastStep.tabId === inputEvent.tabId && lastStep.url === inputEvent.url && - lastStep.frameUrl === inputEvent.frameUrl && // Ensure frameUrls match if both exist + lastStep.frameUrl === inputEvent.frameUrl && lastStep.xpath === inputEvent.xpath && - ((lastStep as InputStep).targetText === inputEvent.targetText || - (lastStep as InputStep).cssSelector === inputEvent.cssSelector) && + lastStep.cssSelector === inputEvent.cssSelector && lastStep.elementTag === inputEvent.elementTag ) { - // Update the last input step (lastStep as InputStep).value = inputEvent.value; - lastStep.timestamp = inputEvent.timestamp; // Update to latest timestamp - (lastStep as InputStep).screenshot = inputEvent.screenshot; // Update to latest screenshot - - // Update semantic targeting if available - if (inputEvent.targetText && inputEvent.targetText.trim()) { - (lastStep as InputStep).targetText = inputEvent.targetText; - delete (lastStep as InputStep).cssSelector; // Remove cssSelector when we have targetText - } + lastStep.timestamp = inputEvent.timestamp; + (lastStep as InputStep).screenshot = inputEvent.screenshot; + lastInputPerKey[key] = { idx: steps.length - 1, ts: nowTs, value: (inputEvent as any).value }; } else { - // Add a new input step const newStep: InputStep = { type: "input", timestamp: inputEvent.timestamp, tabId: inputEvent.tabId, url: inputEvent.url, frameUrl: inputEvent.frameUrl, + frameIdPath: (inputEvent as any).frameIdPath, xpath: inputEvent.xpath, + cssSelector: inputEvent.cssSelector, elementTag: inputEvent.elementTag, value: inputEvent.value, + targetText: (inputEvent as any).targetText, screenshot: inputEvent.screenshot, }; - - // Prioritize target_text for semantic workflows, but include cssSelector for complex elements - if (inputEvent.targetText && inputEvent.targetText.trim()) { - newStep.targetText = inputEvent.targetText; - - // For radio buttons, checkboxes, and complex input elements, also include cssSelector - if (inputEvent.cssSelector && - (inputEvent.cssSelector.includes('radio') || - inputEvent.cssSelector.includes('checkbox') || - inputEvent.cssSelector.includes('role="radio"') || - inputEvent.cssSelector.includes('role="checkbox"'))) { - newStep.cssSelector = inputEvent.cssSelector; - } - } else if (inputEvent.cssSelector) { - newStep.cssSelector = inputEvent.cssSelector; - } - steps.push(newStep); + lastInputPerKey[key] = { idx: steps.length - 1, ts: nowTs, value: (inputEvent as any).value }; } } else { console.warn("Skipping incomplete CUSTOM_INPUT_EVENT:", inputEvent); @@ -426,6 +451,7 @@ export default defineBackground(() => { tabId: keyEvent.tabId, url: keyEvent.url, frameUrl: keyEvent.frameUrl, // Can be missing + frameIdPath: (keyEvent as any).frameIdPath, key: keyEvent.key, xpath: keyEvent.xpath, cssSelector: keyEvent.cssSelector, @@ -452,14 +478,21 @@ export default defineBackground(() => { y: number; }; // Type assertion for clarity const currentTabInfo = tabInfo[rrEvent.tabId]; // Get associated tab info for URL - + // Drop internal chrome pages like chrome://newtab/ + if (currentTabInfo?.url?.startsWith('chrome://')) { + break; + } // Check if the last step added was a mergeable scroll event const lastStep = steps.length > 0 ? steps[steps.length - 1] : null; if ( lastStep && lastStep.type === "scroll" && lastStep.tabId === rrEvent.tabId && - (lastStep as ScrollStep).targetId === scrollData.id + // Treat same XY within a short time window as duplicate, regardless of targetId + (lastStep as ScrollStep).scrollX === scrollData.x && + (lastStep as ScrollStep).scrollY === scrollData.y && + Math.abs(rrEvent.timestamp - lastStep.timestamp) < 200 && + (lastStep as any).url === currentTabInfo?.url ) { // Update the last scroll step (lastStep as ScrollStep).scrollX = scrollData.x; @@ -479,38 +512,53 @@ export default defineBackground(() => { }; steps.push(newStep); } - } else if ((rrEvent.type === EventType.Meta || rrEvent.type === EventType.FullSnapshot) && rrEvent.data?.href) { - // Handle rrweb meta and fullsnapshot events as navigation (filtering now happens at storage level) + } else if (rrEvent.type === EventType.Meta && rrEvent.data?.href) { + // Also handle rrweb meta events as navigation const metaData = rrEvent.data as { href: string }; + const href = metaData.href; + // Drop about:blank always + if (href === 'about:blank') { + break; + } + try { + const urlObj = new URL(href); + const host = urlObj.hostname; + // Allowlist overrides blocklist + const inAllow = settings.allowlist.some(d => host.endsWith(d)); + const inBlock = settings.blocklist.some(d => host.endsWith(d)); + if (!inAllow && inBlock) { + break; + } + if (!settings.enableIframes && !(rrEvent as any).isTopFrame) { + break; // user disabled iframe recording + } + // If top frame, allow + if ((rrEvent as any).isTopFrame) { + // allowed + } else { + const fUrl = (rrEvent as any).frameUrl as string | undefined; + if (!fUrl) break; + const times = interactedFrameTimes[rrEvent.tabId] || {}; + const lastTs = times[fUrl]; + if (!lastTs) break; + const eventTimestamp = typeof (rrEvent as any).timestamp === "number" ? (rrEvent as any).timestamp : Date.now(); + if (eventTimestamp - lastTs > settings.iframeWindow) break; + } + } catch { + break; + } const step: NavigationStep = { type: "navigation", timestamp: rrEvent.timestamp, tabId: rrEvent.tabId, url: metaData.href, + // frameIdPath could be attached if needed }; steps.push(step); } break; } - case "EXTRACTION_STEP": { - const extractEvent = event as any; // Type assertion for extraction event - if (extractEvent.url && extractEvent.extractionGoal) { - const step: ExtractStep = { - type: "extract", - timestamp: extractEvent.timestamp, - tabId: extractEvent.tabId, - url: extractEvent.url, - extractionGoal: extractEvent.extractionGoal, - screenshot: extractEvent.screenshot, - }; - steps.push(step); - } else { - console.warn("Skipping incomplete EXTRACTION_STEP:", extractEvent); - } - break; - } - // Add cases for other StoredEvent types to Step types if needed // e.g., CUSTOM_SELECT_EVENT -> SelectStep // e.g., CUSTOM_TAB_CREATED -> TabCreatedStep @@ -537,6 +585,8 @@ export default defineBackground(() => { "CUSTOM_INPUT_EVENT", "CUSTOM_SELECT_EVENT", "CUSTOM_KEY_EVENT", + // Synthetic event we will emit from content script just before an expected new tab open. + "PREPARE_NEW_TAB", ]; if ( message.type === "RRWEB_EVENT" || @@ -553,6 +603,13 @@ export default defineBackground(() => { const tabId = sender.tab.id; const isCustomEvent = customEventTypes.includes(message.type); + // Record intent for new tab opening to correlate with onCreated event. + if (message.type === "PREPARE_NEW_TAB") { + recentNewTabIntents[sender.tab.id] = Date.now(); + // We do not store this as a workflow step; it's only heuristic metadata. + return false; + } + // Function to store the event const storeEvent = (eventPayload: any, screenshotDataUrl?: string) => { if (!sessionLogs[tabId]) { @@ -568,37 +625,6 @@ export default defineBackground(() => { tabInfo[tabId].title = sender.tab.title; } - // Track user interactions for navigation filtering - if (customEventTypes.includes(message.type)) { - recentUserInteractions[tabId] = eventPayload.timestamp || Date.now(); - console.log(`[NAV_FILTER] Tracked ${message.type} on tab ${tabId} at ${recentUserInteractions[tabId]}`); - } - - // Log all rrweb events for debugging - if (message.type === "RRWEB_EVENT") { - console.log(`[NAV_FILTER] RRWEB event type ${eventPayload.type} (Meta=${EventType.Meta})`, eventPayload.data); - } - - // Filter out side-effect navigation from rrweb meta and fullsnapshot events - if (message.type === "RRWEB_EVENT" && - (eventPayload.type === EventType.Meta || eventPayload.type === EventType.FullSnapshot) && - eventPayload.data?.href) { - const lastUserInteraction = recentUserInteractions[tabId] || 0; - const currentTime = eventPayload.timestamp || Date.now(); - const timeSinceLastInteraction = currentTime - lastUserInteraction; - - // Check if this is the first event in the session (initial page load) - const isFirstEvent = !sessionLogs[tabId] || sessionLogs[tabId].length === 0; - - // Only store navigation if it's the first event (initial page load) or no user interaction has happened - if (lastUserInteraction === 0 || isFirstEvent) { - console.log(`[NAV_FILTER] STORING navigation: ${eventPayload.data.href} (lastInteraction: ${lastUserInteraction}, isFirst: ${isFirstEvent})`); - } else { - console.log(`[NAV_FILTER] FILTERED navigation: ${eventPayload.data.href} (${timeSinceLastInteraction}ms after interaction - always filter post-interaction navigation)`); - return; // Don't store this event - } - } - const eventWithMeta = { ...eventPayload, tabId: tabId, @@ -606,6 +632,14 @@ export default defineBackground(() => { screenshot: screenshotDataUrl, }; sessionLogs[tabId].push(eventWithMeta); + // Mark frame as interacted so subsequent iframe meta navigations can be allowed + if (message.type.startsWith("CUSTOM_") && eventPayload.frameUrl) { + if (!interactedFrameUrls[tabId]) interactedFrameUrls[tabId] = new Set(); + interactedFrameUrls[tabId].add(eventPayload.frameUrl); + if (!interactedFrameTimes[tabId]) interactedFrameTimes[tabId] = {}; + const interactionTimestamp = typeof eventPayload.timestamp === "number" ? eventPayload.timestamp : Date.now(); + interactedFrameTimes[tabId][eventPayload.frameUrl] = interactionTimestamp; + } broadcastWorkflowDataUpdate(); // Call is async, will not block // console.log(`Stored ${message.type} from tab ${tabId}`); }; @@ -663,9 +697,6 @@ export default defineBackground(() => { (key) => delete sessionLogs[parseInt(key)] ); Object.keys(tabInfo).forEach((key) => delete tabInfo[parseInt(key)]); - Object.keys(recentUserInteractions).forEach( - (key) => delete recentUserInteractions[parseInt(key)] - ); console.log("Cleared previous recording data."); // Start recording @@ -700,83 +731,6 @@ export default defineBackground(() => { } sendResponse({ status: "stopped" }); // Send simple confirmation } - // --- Add Extraction Step from Sidepanel --- - else if (message.type === "ADD_EXTRACTION_STEP") { - console.log("πŸ€– Received ADD_EXTRACTION_STEP request:", message.payload); - - if (!isRecordingEnabled) { - console.error("❌ Recording is not enabled"); - sendResponse({ status: "error", message: "Recording is not active" }); - return false; - } - - try { - // For sidepanel messages, we need to get the active tab - // Since this is from sidepanel, sender.tab will be undefined - // Let's use a direct approach with chrome.tabs.query but handle it synchronously - - isAsync = true; - - chrome.tabs.query({ active: true, currentWindow: true }, (tabs) => { - try { - console.log("πŸ“‹ Active tabs found:", tabs?.length || 0); - - if (chrome.runtime.lastError) { - console.error("❌ Chrome tabs query error:", chrome.runtime.lastError); - sendResponse({ status: "error", message: "Chrome tabs query failed" }); - return; - } - - if (!tabs || tabs.length === 0 || !tabs[0]?.id) { - console.error("❌ No active tab found"); - sendResponse({ status: "error", message: "No active tab found" }); - return; - } - - const tabId = tabs[0].id; - const tabUrl = tabs[0].url || ""; - - console.log("βœ… Using tab ID:", tabId, "URL:", tabUrl); - - const extractionStep: StoredExtractionEvent = { - timestamp: message.payload.timestamp, - tabId: tabId, - url: tabUrl, - extractionGoal: message.payload.extractionGoal, - messageType: "EXTRACTION_STEP", - }; - - console.log("πŸ“ Creating extraction step:", extractionStep); - - if (!sessionLogs[tabId]) { - console.log("πŸ†• Initializing sessionLogs for tab:", tabId); - sessionLogs[tabId] = []; - } - - sessionLogs[tabId].push(extractionStep); - console.log("βœ… Added extraction step to sessionLogs. Total events for tab:", sessionLogs[tabId].length); - - // Broadcast update (don't await to avoid blocking) - broadcastWorkflowDataUpdate(); - console.log("βœ… Broadcasted workflow update"); - - // Send success response - sendResponse({ status: "added" }); - - } catch (error) { - console.error("❌ Error in tabs.query callback:", error); - sendResponse({ status: "error", message: `Callback error: ${error}` }); - } - }); - - return true; // Keep message channel open - - } catch (error) { - console.error("❌ Error setting up extraction step:", error); - sendResponse({ status: "error", message: `Setup error: ${error}` }); - return false; - } - } // --- Status Request from Content Script --- else if (message.type === "REQUEST_RECORDING_STATUS" && sender.tab?.id) { console.log( diff --git a/extension/src/entrypoints/content.ts b/extension/src/entrypoints/content.ts index 08bef61..734b468 100644 --- a/extension/src/entrypoints/content.ts +++ b/extension/src/entrypoints/content.ts @@ -64,6 +64,102 @@ const SAFE_ATTRIBUTES = new Set([ "data-testid", ]); +function computeFrameIdPath(): string { + try { + let current: Window & typeof globalThis = window; + const parts: number[] = []; + while (current !== current.parent) { + const parent = current.parent; + let idx = 0; + for (let i = 0; i < parent.frames.length; i++) { + if (parent.frames[i] === current) { + idx = i; + break; + } + } + parts.unshift(idx); + current = parent; + if (parts.length > 10) break; + } + return parts.length ? parts.join('.') : '0'; + } catch { + return '0'; + } +} + + + + + +function getLabelText(element: HTMLElement): string | undefined { + const id = (element as HTMLInputElement).id; + if (id) { + try { + const label = document.querySelector(`label[for="${CSS.escape(id)}"]`); + const labelText = label?.textContent?.trim(); + if (labelText) { + return labelText.slice(0, 200); + } + } catch (error) { + console.warn("Failed to query label by id", error); + } + } + let current: HTMLElement | null = element; + while (current && current !== document.body) { + if (current.tagName.toLowerCase() === "label") { + const labelText = current.textContent?.trim(); + if (labelText) { + return labelText.slice(0, 200); + } + } + current = current.parentElement; + } + return undefined; +} + +function getTextFromAriaLabelledBy(element: HTMLElement): string | undefined { + const ids = element.getAttribute("aria-labelledby"); + if (!ids) return undefined; + const parts = ids.split(/\s+/).filter(Boolean); + const texts: string[] = []; + for (const id of parts) { + const ref = document.getElementById(id); + const text = ref?.textContent?.trim(); + if (text) { + texts.push(text); + } + } + if (!texts.length) { + return undefined; + } + return texts.join(" ").slice(0, 200); +} + +function computeTargetText(element: HTMLElement): string | undefined { + const tagName = element.tagName.toLowerCase(); + const inputType = (element as HTMLInputElement).type?.toLowerCase(); + const labelText = getLabelText(element); + const ariaLabel = element.getAttribute("aria-label")?.trim(); + const ariaLabelledBy = getTextFromAriaLabelledBy(element); + const placeholder = element.getAttribute("placeholder")?.trim(); + const title = element.getAttribute("title")?.trim(); + const valueAttr = (element as HTMLInputElement).value?.trim(); + const textContent = element.textContent?.trim(); + const candidates: (string | undefined)[] = []; + if (tagName === "button" || (tagName === "input" && ["button", "submit"].includes(inputType || ""))) { + candidates.push(textContent, labelText, ariaLabel, ariaLabelledBy, title, valueAttr); + } else if (["input", "textarea", "select"].includes(tagName)) { + candidates.push(labelText, placeholder, ariaLabel, ariaLabelledBy, title, valueAttr, textContent); + } else { + candidates.push(labelText, ariaLabel, ariaLabelledBy, textContent, title); + } + for (const candidate of candidates) { + if (candidate && candidate.trim().length > 0) { + return candidate.trim().slice(0, 200); + } + } + return undefined; +} function getEnhancedCSSSelector(element: HTMLElement, xpath: string): string { try { // Base selector from simplified XPath or just tagName @@ -122,6 +218,10 @@ function startRecorder() { emit(event) { if (!isRecordingActive) return; + const frameUrl = window.location.href; + const isTopFrame = window.self === window.top; + const frameIdPath = computeFrameIdPath(); + // Handle scroll events with debouncing and direction detection if ( event.type === EventType.IncrementalSnapshot && @@ -157,7 +257,10 @@ function startRecorder() { type: "RRWEB_EVENT", payload: { ...event, - data: roundedScrollData, // Use rounded coordinates + data: roundedScrollData, + frameUrl, + frameIdPath, + isTopFrame, }, }); lastDirection = currentDirection; @@ -178,15 +281,18 @@ function startRecorder() { type: "RRWEB_EVENT", payload: { ...event, - data: roundedScrollData, // Use rounded coordinates + data: roundedScrollData, + frameUrl, + frameIdPath, + isTopFrame, }, }); scrollTimeout = null; lastDirection = null; // Reset direction for next scroll }, DEBOUNCE_MS); } else { - // Pass through non-scroll events unchanged - chrome.runtime.sendMessage({ type: "RRWEB_EVENT", payload: event }); + // Pass through non-scroll events unchanged, but include frame context for filtering in background + chrome.runtime.sendMessage({ type: "RRWEB_EVENT", payload: { ...event, frameUrl, frameIdPath, isTopFrame } }); } }, maskInputOptions: { @@ -234,462 +340,77 @@ function stopRecorder() { } } -// --- Helper function to extract semantic information --- -function extractSemanticInfo(element: HTMLElement) { - // Get associated label text using multiple strategies - let labelText = ''; - const elementType = (element as any).type?.toLowerCase() || ''; - const elementTag = element.tagName.toLowerCase(); - - // Special handling for radio buttons and checkboxes - if ((elementTag === 'input' && (elementType === 'radio' || elementType === 'checkbox')) || - (elementTag === 'button' && element.getAttribute('role') === 'radio')) { - - let fieldName = ''; // The group/field name (e.g., "Marital Status") - let optionValue = ''; // The specific option (e.g., "Married") - let allOptions: string[] = []; // All possible values in the group - - // First, get the individual option value/label - // Strategy 1: Direct label[for="id"] association (most reliable for radio buttons) - if ((element as any).id) { - const label = document.querySelector(`label[for="${(element as any).id}"]`); - if (label) { - optionValue = label.textContent?.trim() || ''; - } - } - - // Strategy 2: Look for immediate parent label (common pattern) - if (!optionValue) { - const parent = element.parentElement; - if (parent && parent.tagName.toLowerCase() === 'label') { - optionValue = parent.textContent?.trim() || ''; - } - } - - // Strategy 3: Look for adjacent text nodes or spans (common in custom radio buttons) - if (!optionValue) { - // Check next sibling for text - let sibling = element.nextElementSibling; - while (sibling && !optionValue) { - const siblingText = sibling.textContent?.trim() || ''; - if (siblingText && siblingText.length < 50 && siblingText.length > 1) { - optionValue = siblingText; - break; - } - sibling = sibling.nextElementSibling; - } - - // If no next sibling, check previous sibling - if (!optionValue) { - sibling = element.previousElementSibling; - while (sibling && !optionValue) { - const siblingText = sibling.textContent?.trim() || ''; - if (siblingText && siblingText.length < 50 && siblingText.length > 1) { - optionValue = siblingText; - break; - } - sibling = sibling.previousElementSibling; - } - } - } - - // Strategy 4: Use value attribute for radio buttons if no label found - if (!optionValue && elementType === 'radio') { - const value = (element as any).value || ''; - if (value && value.length < 30) { - optionValue = value; - } - } - - // Now find the field name and all options for radio button groups - if (elementType === 'radio') { - const radioName = (element as any).name || ''; - - // Find the field group name by looking for fieldset legend or group labels - let container = element.parentElement; - while (container && container !== document.body) { - // Check for fieldset with legend - if (container.tagName.toLowerCase() === 'fieldset') { - const legend = container.querySelector('legend'); - if (legend) { - fieldName = legend.textContent?.trim() || ''; - break; - } - } - - // Check for group labels (like div with a label or heading) - const possibleLabels = container.querySelectorAll('label, h1, h2, h3, h4, h5, h6, .label, .form-label, .field-label'); - for (const possibleLabel of possibleLabels) { - const labelText = possibleLabel.textContent?.trim() || ''; - // Check if this label doesn't belong to a specific input (not associated with any radio button value) - const isGeneralLabel = !Array.from(container.querySelectorAll('input[type="radio"]')).some(radio => { - const radioValue = (radio as any).value || ''; - const radioLabel = radio.closest('label')?.textContent?.trim() || ''; - return labelText.includes(radioValue) || labelText.includes(radioLabel); - }); - - if (labelText && labelText.length > 2 && labelText.length < 100 && isGeneralLabel) { - fieldName = labelText; - break; - } - } - - if (fieldName) break; - container = container.parentElement; - } - - // Collect all options in the same radio group - if (radioName) { - const radioGroup = document.querySelectorAll(`input[type="radio"][name="${radioName}"]`); - radioGroup.forEach((radio) => { - // Get the label for each radio button - let radioOptionText = ''; - const radioId = (radio as any).id; - if (radioId) { - const radioLabel = document.querySelector(`label[for="${radioId}"]`); - if (radioLabel) { - radioOptionText = radioLabel.textContent?.trim() || ''; - } - } - - if (!radioOptionText) { - const radioParent = radio.parentElement; - if (radioParent && radioParent.tagName.toLowerCase() === 'label') { - radioOptionText = radioParent.textContent?.trim() || ''; - } - } - - if (!radioOptionText) { - radioOptionText = (radio as any).value || ''; - } - - if (radioOptionText && !allOptions.includes(radioOptionText)) { - allOptions.push(radioOptionText); - } - }); - } - } - - // Create meaningful labelText combining field name and option - if (fieldName && optionValue) { - labelText = `${fieldName}: ${optionValue}`; - } else if (optionValue) { - labelText = optionValue; - } else if (fieldName) { - labelText = fieldName; - } - - // Store additional radio button info for later use - (element as any)._radioButtonInfo = { - fieldName, - optionValue, - allOptions - }; - - // Fallback: Look in immediate parent container but filter out other radio button text - if (!labelText) { - const parent = element.parentElement; - if (parent) { - // Get all radio buttons in the same group - const radioButtons = parent.querySelectorAll('input[type="radio"], button[role="radio"]'); - const parentText = parent.textContent?.trim() || ''; - - if (parentText && parentText.length < 100) { - // Try to extract just this radio button's text by removing other radio button values - let cleanedText = parentText; - radioButtons.forEach((radio) => { - if (radio !== element) { - const radioValue = (radio as any).value || ''; - const radioText = radio.textContent?.trim() || ''; - if (radioValue) cleanedText = cleanedText.replace(radioValue, '').trim(); - if (radioText) cleanedText = cleanedText.replace(radioText, '').trim(); - } - }); - - if (cleanedText && cleanedText.length > 1 && cleanedText.length < 50) { - labelText = cleanedText; - } - } - } - } - } else { - // Standard label extraction for non-radio elements - - // Special handling for buttons - use direct text content - if (elementTag === 'button' || - (elementTag === 'input' && ['button', 'submit'].includes(elementType))) { - // For buttons, prioritize the element's own text content - labelText = element.textContent?.trim() || ''; - - // If no direct text, try aria-label or value - if (!labelText) { - labelText = element.getAttribute('aria-label') || - (element as any).value || - element.title || ''; - } - } else { - // Strategy 1: Direct label[for="id"] association - if ((element as any).id) { - const label = document.querySelector(`label[for="${(element as any).id}"]`); - if (label) { - labelText = label.textContent?.trim() || ''; - } - } - - // Strategy 2: Find parent label element - if (!labelText) { - let parent = element.parentElement; - while (parent && parent !== document.body) { - if (parent.tagName.toLowerCase() === 'label') { - labelText = parent.textContent?.trim() || ''; - break; - } - parent = parent.parentElement; - } - } - } - - // Strategy 3: Look for associated text in immediate siblings or parent containers - if (!labelText) { - const parent = element.parentElement; - if (parent) { - // Check for text in the same container - const parentText = parent.textContent?.trim() || ''; - // Extract meaningful text that's not just the element's own value/placeholder - const elementOwnText = ((element as any).value || (element as any).placeholder || element.textContent || '').trim(); - - if (parentText && parentText !== elementOwnText && parentText.length < 200) { - // Try to extract the label part by removing element's own text - let cleanedText = parentText; - if (elementOwnText) { - cleanedText = parentText.replace(elementOwnText, '').trim(); - } - - // If we have meaningful text, use it - if (cleanedText && cleanedText.length > 2) { - labelText = cleanedText; - } else if (parentText.length < 100) { - // Use the full parent text if it's reasonable length - labelText = parentText; - } - } - } - } - - // Strategy 4: Look for preceding text nodes or elements - if (!labelText) { - let sibling = element.previousElementSibling; - while (sibling && !labelText) { - const siblingText = sibling.textContent?.trim() || ''; - if (siblingText && siblingText.length < 100 && siblingText.length > 2) { - labelText = siblingText; - break; - } - sibling = sibling.previousElementSibling; - } - } - - // Strategy 5: Check aria-labelledby references - if (!labelText) { - const ariaLabelledBy = element.getAttribute('aria-labelledby'); - if (ariaLabelledBy) { - const referencedElement = document.getElementById(ariaLabelledBy); - if (referencedElement) { - labelText = referencedElement.textContent?.trim() || ''; - } - } - } - } - - // Get parent context for additional semantic information - let parentText = ''; - let parent = element.parentElement; - while (parent && !parentText && parent !== document.body) { - const text = parent.textContent?.trim() || ''; - if (text && text.length < 100) { - parentText = text; - } - parent = parent.parentElement; - } - - // Get radio button info if available - const radioButtonInfo = (element as any)._radioButtonInfo || null; - - return { - labelText, - textContent: element.textContent?.trim().slice(0, 200) || "", - placeholder: (element as any).placeholder || "", - title: element.title || "", - ariaLabel: element.getAttribute('aria-label') || "", - value: (element as any).value || "", - name: (element as any).name || "", - id: (element as any).id || "", - type: (element as any).type || "", - parentText, - // Radio button specific info - radioButtonInfo - }; -} - // --- Custom Click Handler --- function handleCustomClick(event: MouseEvent) { if (!isRecordingActive) return; const targetElement = event.target as HTMLElement; if (!targetElement) return; - + // Determine a frame identifier (best-effort). Top frame = 0, nested frames build path. + const frameIdPath = computeFrameIdPath(); try { const xpath = getXPath(targetElement); - const semanticInfo = extractSemanticInfo(targetElement); - - // Determine the best target_text for semantic targeting - // For buttons, prioritize direct text content over label text to avoid confusion - let targetText = ""; - if (targetElement.tagName.toLowerCase() === 'button' || - (targetElement.tagName.toLowerCase() === 'input' && - ['button', 'submit'].includes((targetElement as any).type?.toLowerCase()))) { - // For buttons, use the most specific text available - targetText = semanticInfo.textContent?.trim() || - semanticInfo.ariaLabel || - (targetElement as any).value || - semanticInfo.title || - ""; - } else { - // For other elements, use the standard priority order - targetText = semanticInfo.labelText || - semanticInfo.textContent || - semanticInfo.placeholder || - semanticInfo.ariaLabel || - semanticInfo.name || - semanticInfo.id || - ""; - } - - // Smart filtering: Skip capturing clicks on elements that are likely redundant - if (shouldSkipClickEvent(targetElement, semanticInfo, targetText)) { - console.log("Skipping redundant click event on:", targetElement.tagName, targetText); - return; - } - - // Capture element state information for inputs - const elementType = (targetElement as any).type?.toLowerCase() || ''; - let elementState = {}; - - if (targetElement.tagName.toLowerCase() === 'input') { - if (elementType === 'checkbox') { - elementState = { - checked: (targetElement as HTMLInputElement).checked, - elementType: 'checkbox' - }; - } else if (elementType === 'radio') { - elementState = { - checked: (targetElement as HTMLInputElement).checked, - elementType: 'radio' - }; - } else { - elementState = { - elementType: elementType - }; - } - } - const clickData = { timestamp: Date.now(), - url: document.location.href, // Use document.location for main page URL - frameUrl: window.location.href, // URL of the frame where the event occurred - xpath: xpath, + url: document.location.href, + frameUrl: window.location.href, + frameIdPath, + xpath, cssSelector: getEnhancedCSSSelector(targetElement, xpath), elementTag: targetElement.tagName, - elementText: semanticInfo.textContent, - elementType: elementType, // Add element type for processing - ...elementState, // Spread element state (checked status for checkboxes/radios) - // Semantic information for target_text based workflows - targetText: targetText, - semanticInfo: semanticInfo, - // Enhanced radio button information - radioButtonInfo: semanticInfo.radioButtonInfo, + elementText: targetElement.textContent?.trim().slice(0, 200) || "", + targetText: computeTargetText(targetElement), }; - console.log("Sending CUSTOM_CLICK_EVENT:", clickData); - chrome.runtime.sendMessage({ - type: "CUSTOM_CLICK_EVENT", - payload: clickData, - }); - } catch (error) { - console.error("Error capturing click data:", error); - } -} - -// Helper function to determine if we should skip capturing this click event -function shouldSkipClickEvent(element: HTMLElement, semanticInfo: any, targetText: string): boolean { - const tagName = element.tagName.toLowerCase(); - const elementType = (element as any).type?.toLowerCase() || ''; - - // Skip hidden input elements (they often fire alongside visible elements) - if (tagName === 'input' && elementType === 'radio' && !isElementVisible(element)) { - return true; - } - - // Skip button elements that have no meaningful text and are likely part of a composite component - if (tagName === 'button' && - element.getAttribute('role') === 'radio' && - !targetText.trim()) { - return true; - } - - // Skip clicks on elements that have no semantic value and are very generic - if (!targetText.trim() && - tagName === 'input' && - elementType === 'radio' && - element.style.display === 'none') { - return true; - } - - return false; -} - -// Helper function to check if an element is visible -function isElementVisible(element: HTMLElement): boolean { - const style = window.getComputedStyle(element); - return style.display !== 'none' && - style.visibility !== 'hidden' && - style.opacity !== '0' && - element.offsetWidth > 0 && - element.offsetHeight > 0; + chrome.runtime.sendMessage({ type: "CUSTOM_CLICK_EVENT", payload: clickData }); + } catch (error) { console.error("Error capturing click data:", error); } } // --- End Custom Click Handler --- // --- Custom Input Handler --- +// Maintain last recorded value & timestamp per element (keyed by xpath) to suppress noisy repeats +const lastInputRecord: Record = {}; function handleInput(event: Event) { if (!isRecordingActive) return; const targetElement = event.target as HTMLInputElement | HTMLTextAreaElement; if (!targetElement || !("value" in targetElement)) return; const isPassword = targetElement.type === "password"; + // Ignore programmatic (non user-trusted) input events – these often cause massive duplication + if (!(event as InputEvent).isTrusted) return; + + const frameIdPath = computeFrameIdPath(); try { const xpath = getXPath(targetElement); - const semanticInfo = extractSemanticInfo(targetElement as HTMLElement); - - // Determine the best target_text for semantic targeting - // For inputs, prioritize labelText and placeholder over textContent - const targetText = semanticInfo.labelText || - semanticInfo.placeholder || - semanticInfo.ariaLabel || - semanticInfo.name || - semanticInfo.id || - ""; - const inputData = { timestamp: Date.now(), url: document.location.href, frameUrl: window.location.href, + frameIdPath, xpath: xpath, cssSelector: getEnhancedCSSSelector(targetElement, xpath), elementTag: targetElement.tagName, value: isPassword ? "********" : targetElement.value, - inputType: (targetElement as any).type?.toLowerCase() || 'text', // Input type (text, password, email, etc.) - // Semantic information for target_text based workflows - targetText: targetText, - semanticInfo: semanticInfo, + targetText: computeTargetText(targetElement), }; + + // Dedupe rule 1: If value unchanged for this element and within debounce window, skip + const DEBOUNCE_MS_INPUT = 1500; + const prev = lastInputRecord[xpath]; + if (prev && prev.value === inputData.value && inputData.timestamp - prev.ts < DEBOUNCE_MS_INPUT) { + return; // Suppress noisy duplicate + } + + // Dedupe rule 2: If value is empty string and we already recorded empty in last 5s, suppress further empties + if ( + inputData.value === "" && + prev && + prev.value === "" && + inputData.timestamp - prev.ts < 5000 + ) { + return; + } + + // Store/update last record metadata + lastInputRecord[xpath] = { value: inputData.value, ts: inputData.timestamp }; console.log("Sending CUSTOM_INPUT_EVENT:", inputData); chrome.runtime.sendMessage({ type: "CUSTOM_INPUT_EVENT", @@ -707,39 +428,22 @@ function handleSelectChange(event: Event) { const targetElement = event.target as HTMLSelectElement; // Ensure it's a select element if (!targetElement || targetElement.tagName !== "SELECT") return; + const frameIdPath = computeFrameIdPath(); try { const xpath = getXPath(targetElement); const selectedOption = targetElement.options[targetElement.selectedIndex]; - - // Extract all available options - const allOptions: Array<{text: string, value: string}> = []; - for (let i = 0; i < targetElement.options.length; i++) { - const option = targetElement.options[i]; - allOptions.push({ - text: option.text.trim(), - value: option.value - }); - } - - // Get semantic info for the select element - const semanticInfo = extractSemanticInfo(targetElement); - const fieldName = semanticInfo.labelText || semanticInfo.name || - semanticInfo.ariaLabel || targetElement.name || ''; - const selectData = { timestamp: Date.now(), url: document.location.href, frameUrl: window.location.href, + frameIdPath, xpath: xpath, cssSelector: getEnhancedCSSSelector(targetElement, xpath), elementTag: targetElement.tagName, selectedValue: targetElement.value, selectedText: selectedOption ? selectedOption.text : "", // Get selected option text - allOptions: allOptions, // Include all available options - fieldName: fieldName, // Field name/label - targetText: semanticInfo.labelText || fieldName, - semanticInfo: semanticInfo + targetText: computeTargetText(targetElement), }; console.log("Sending CUSTOM_SELECT_EVENT:", selectData); chrome.runtime.sendMessage({ @@ -807,11 +511,13 @@ function handleKeydown(event: KeyboardEvent) { } } + const frameIdPath = computeFrameIdPath(); try { const keyData = { timestamp: Date.now(), url: document.location.href, frameUrl: window.location.href, + frameIdPath, key: keyToLog, // The key or combination pressed xpath: xpath, // XPath of the element in focus (if any) cssSelector: cssSelector, // CSS selector of the element in focus (if any) @@ -974,6 +680,9 @@ function handleBlur(event: FocusEvent) { export default defineContentScript({ matches: [""], + // Ensure injection into all frames (iframes) so we can capture interactions inside nested documents. + allFrames: true, + matchAboutBlank: true, main(ctx) { // Listener for status updates from the background script chrome.runtime.onMessage.addListener((message, sender, sendResponse) => { @@ -1043,3 +752,4 @@ export default defineContentScript({ // }); }, }); + diff --git a/extension/src/entrypoints/options.html b/extension/src/entrypoints/options.html new file mode 100644 index 0000000..dfb7bf4 --- /dev/null +++ b/extension/src/entrypoints/options.html @@ -0,0 +1,80 @@ + + + + + Workflow Use - Options + + + +

Recording Settings

+
+
+ +
When disabled, iframe-originated navigation/meta events are ignored.
+
+
+ + +
Time after a user interaction in an iframe during which rrweb meta navigations are allowed.
+
+
+ + +
+
+ + +
+ + +
+ + + + diff --git a/extension/src/entrypoints/sidepanel/components/recording-view.tsx b/extension/src/entrypoints/sidepanel/components/recording-view.tsx index 00ebfd8..1aa8a0d 100644 --- a/extension/src/entrypoints/sidepanel/components/recording-view.tsx +++ b/extension/src/entrypoints/sidepanel/components/recording-view.tsx @@ -1,193 +1,30 @@ -import React, { useState } from "react"; +import React from "react"; import { useWorkflow } from "../context/workflow-provider"; import { Button } from "@/components/ui/button"; -import { EventViewer } from "./event-viewer"; // Import EventViewer +import { EventViewer } from "./event-viewer"; export const RecordingView: React.FC = () => { - const { stopRecording, workflow, recordingStatus } = useWorkflow(); + const { stopRecording, workflow } = useWorkflow(); const stepCount = workflow?.steps?.length || 0; - const [showExtractionDialog, setShowExtractionDialog] = useState(false); - const [extractionGoal, setExtractionGoal] = useState(""); - - const handleAddExtraction = () => { - if (extractionGoal.trim()) { - const payload = { - extractionGoal: extractionGoal.trim(), - timestamp: Date.now(), - }; - - console.log("πŸ€– Sending extraction step request:", payload); - console.log("πŸ“Š Current workflow stats:", stats); - console.log("πŸ“ Current workflow:", workflow); - console.log("πŸ”΄ Recording status:", recordingStatus); - - // Set up a timeout to handle potential message port issues - let timeoutId: NodeJS.Timeout; - let responseReceived = false; - - const timeoutPromise = new Promise((_, reject) => { - timeoutId = setTimeout(() => { - if (!responseReceived) { - reject(new Error("Request timeout - no response received within 5 seconds")); - } - }, 5000); - }); - - // Send extraction step to background script - chrome.runtime.sendMessage({ - type: "ADD_EXTRACTION_STEP", - payload: payload - }, (response) => { - responseReceived = true; - clearTimeout(timeoutId); - - console.log("πŸ“¨ Extraction step response:", response); - - if (chrome.runtime.lastError) { - console.error("❌ Chrome runtime error:", chrome.runtime.lastError); - alert(`Chrome runtime error: ${chrome.runtime.lastError.message}\n\nTry reloading the extension and starting a new recording.`); - return; - } - - if (response?.status === "added") { - console.log("βœ… Extraction step added successfully"); - setExtractionGoal(""); - setShowExtractionDialog(false); - } else { - console.error("❌ Failed to add extraction step:", response); - const errorMessage = response?.message || 'Unknown error'; - alert(`Failed to add extraction step: ${errorMessage}`); - } - }); - - // Handle timeout case - timeoutPromise.catch((error) => { - if (!responseReceived) { - console.error("❌ Request timeout:", error); - alert("Request timed out. Please try again or reload the extension."); - } - }); - - } else { - console.warn("⚠️ Extraction goal is empty"); - } - }; - - // Get workflow stats - const stats = React.useMemo(() => { - if (!workflow?.steps) return { actions: 0, extractions: 0, navigations: 0 }; - - const actions = workflow.steps.filter(s => ['click', 'input', 'key_press'].includes(s.type)).length; - const extractions = workflow.steps.filter(s => (s as any).type === 'extract').length; - const navigations = workflow.steps.filter(s => s.type === 'navigation').length; - - return { actions, extractions, navigations }; - }, [workflow?.steps]); return ( -
- {/* Header */} -
-
+
+
+
-
- - Recording ({stepCount} steps) - -
- {stats.actions} actions β€’ {stats.navigations} nav β€’ {stats.extractions} AI extractions -
- {/* Debug status */} -
- Status: {recordingStatus} -
-
+ + Recording ({stepCount} steps) +
-
- -
- - {/* Extraction Dialog */} - {showExtractionDialog && ( -
-
-

Add AI Extraction Step

-

- Describe what information you want to extract from the current page: -

-