diff --git a/.changeset/open-donkeys-shine.md b/.changeset/open-donkeys-shine.md new file mode 100644 index 000000000..23ce9a0bc --- /dev/null +++ b/.changeset/open-donkeys-shine.md @@ -0,0 +1,5 @@ +--- +"@browserbasehq/stagehand": patch +--- + +enable scrolling inside of iframes diff --git a/evals/evals.config.json b/evals/evals.config.json index 9d3c7ccbe..126cb0946 100644 --- a/evals/evals.config.json +++ b/evals/evals.config.json @@ -419,6 +419,10 @@ "name": "agent/sign_in", "categories": ["agent"] }, + { + "name": "iframe_scroll", + "categories": ["act"] + }, { "name": "namespace_xpath", "categories": ["act"] diff --git a/evals/tasks/iframe_scroll.ts b/evals/tasks/iframe_scroll.ts new file mode 100644 index 000000000..8091c087d --- /dev/null +++ b/evals/tasks/iframe_scroll.ts @@ -0,0 +1,60 @@ +import { EvalFunction } from "@/types/evals"; + +export const iframe_scroll: EvalFunction = async ({ + debugUrl, + sessionUrl, + stagehand, + logger, +}) => { + try { + await stagehand.page.goto( + "https://browserbase.github.io/stagehand-eval-sites/sites/iframe-same-proc-scroll/", + ); + await stagehand.page.act({ + action: "scroll down 50% inside the iframe", + iframes: true, + }); + + const frames = stagehand.page.frames(); + const frame = frames[1]; + + await new Promise((resolve) => setTimeout(resolve, 5000)); + + // Get the current scroll position and total scroll height + const scrollInfo = await frame.evaluate(() => { + return { + scrollTop: window.scrollY + window.innerHeight / 2, + scrollHeight: document.documentElement.scrollHeight, + }; + }); + + const halfwayScroll = scrollInfo.scrollHeight / 2; + const halfwayReached = Math.abs(scrollInfo.scrollTop - halfwayScroll) <= 1; + const evaluationResult = halfwayReached + ? { + _success: true, + logs: logger.getLogs(), + debugUrl, + sessionUrl, + } + : { + _success: false, + logs: logger.getLogs(), + debugUrl, + sessionUrl, + message: `Scroll position (${scrollInfo.scrollTop}px) is not halfway down the page (${halfwayScroll}px).`, + }; + + return evaluationResult; + } catch (error) { + return { + _success: false, + error: error, + logs: logger.getLogs(), + debugUrl, + sessionUrl, + }; + } finally { + await stagehand.close(); + } +}; diff --git a/lib/a11y/utils.ts b/lib/a11y/utils.ts index 6c77af94e..06dc8c4b3 100644 --- a/lib/a11y/utils.ts +++ b/lib/a11y/utils.ts @@ -30,6 +30,8 @@ const PUA_END = 0xf8ff; const NBSP_CHARS = new Set([0x00a0, 0x202f, 0x2007, 0xfeff]); +const WORLD_NAME = "stagehand-world"; + /** * Clean a string by removing private-use unicode characters, normalizing whitespace, * and trimming the result. @@ -1045,6 +1047,8 @@ export async function resolveObjectIdForXPath( xpath: string, targetFrame?: Frame, ): Promise { + const contextId = await getFrameExecutionContextId(page, targetFrame); + const { result } = await page.sendCDP<{ result?: { objectId?: string }; }>( @@ -1063,6 +1067,7 @@ export async function resolveObjectIdForXPath( })(); `, returnByValue: false, + ...(contextId !== undefined ? { contextId } : {}), }, targetFrame, ); @@ -1070,6 +1075,34 @@ export async function resolveObjectIdForXPath( return result.objectId; } +/** + * Returns a stable executionContextId for the given frame by creating (or reusing) + * an isolated world in that frame. + */ +async function getFrameExecutionContextId( + stagehandPage: StagehandPage, + frame: Frame, +): Promise { + if (!frame || frame === stagehandPage.page.mainFrame()) { + // Main frame (or no frame): use the default world. + return undefined; + } + const frameId: string = await getCDPFrameId(stagehandPage, frame); + const { executionContextId } = await stagehandPage.sendCDP<{ + executionContextId: number; + }>( + "Page.createIsolatedWorld", + { + frameId, + worldName: WORLD_NAME, + grantUniversalAccess: true, + }, + frame, + ); + + return executionContextId; +} + /** * Collapse consecutive whitespace characters (spaces, tabs, newlines, carriage returns) * into single ASCII spaces. diff --git a/lib/handlers/handlerUtils/actHandlerUtils.ts b/lib/handlers/handlerUtils/actHandlerUtils.ts index 1c32cf7f9..de34519c0 100644 --- a/lib/handlers/handlerUtils/actHandlerUtils.ts +++ b/lib/handlers/handlerUtils/actHandlerUtils.ts @@ -1,7 +1,6 @@ import { Page, Locator, FrameLocator } from "playwright"; import { PlaywrightCommandException } from "../../../types/playwright"; import { StagehandPage } from "../../StagehandPage"; -import { getNodeFromXpath } from "@/lib/dom/utils"; import { Logger } from "../../../types/log"; import { MethodHandlerContext } from "@/types/act"; import { StagehandClickError } from "@/types/stagehandErrors"; @@ -59,7 +58,7 @@ export const methodHandlerMap: Record< }; export async function scrollToNextChunk(ctx: MethodHandlerContext) { - const { stagehandPage, xpath, logger } = ctx; + const { locator, logger, xpath } = ctx; logger({ category: "action", @@ -71,40 +70,45 @@ export async function scrollToNextChunk(ctx: MethodHandlerContext) { }); try { - await stagehandPage.page.evaluate( - ({ xpath }) => { - const elementNode = getNodeFromXpath(xpath); - if (!elementNode || elementNode.nodeType !== Node.ELEMENT_NODE) { - throw Error(`Could not locate element to scroll on.`); - } + await locator.evaluate( + (element) => { + const waitForScrollEnd = (el: HTMLElement | Element) => + new Promise((resolve) => { + let last = el.scrollTop ?? 0; + const check = () => { + const cur = el.scrollTop ?? 0; + if (cur === last) return resolve(); + last = cur; + requestAnimationFrame(check); + }; + requestAnimationFrame(check); + }); - const element = elementNode as HTMLElement; const tagName = element.tagName.toLowerCase(); - let height: number; if (tagName === "html" || tagName === "body") { - height = window.visualViewport.height; - window.scrollBy({ - top: height, - left: 0, - behavior: "smooth", - }); + const height = window.visualViewport?.height ?? window.innerHeight; - const scrollingEl = - document.scrollingElement || document.documentElement; - return window.waitForElementScrollEnd(scrollingEl as HTMLElement); - } else { - height = element.getBoundingClientRect().height; - element.scrollBy({ - top: height, - left: 0, - behavior: "smooth", - }); + window.scrollBy({ top: height, left: 0, behavior: "smooth" }); + + const scrollingRoot = (document.scrollingElement ?? + document.documentElement) as HTMLElement; - return window.waitForElementScrollEnd(element); + return waitForScrollEnd(scrollingRoot); } + + const height = (element as HTMLElement).getBoundingClientRect().height; + + (element as HTMLElement).scrollBy({ + top: height, + left: 0, + behavior: "smooth", + }); + + return waitForScrollEnd(element); }, - { xpath }, + undefined, + { timeout: 10_000 }, ); } catch (e) { logger({ @@ -122,7 +126,7 @@ export async function scrollToNextChunk(ctx: MethodHandlerContext) { } export async function scrollToPreviousChunk(ctx: MethodHandlerContext) { - const { stagehandPage, xpath, logger } = ctx; + const { locator, logger, xpath } = ctx; logger({ category: "action", @@ -134,39 +138,41 @@ export async function scrollToPreviousChunk(ctx: MethodHandlerContext) { }); try { - await stagehandPage.page.evaluate( - ({ xpath }) => { - const elementNode = getNodeFromXpath(xpath); - if (!elementNode || elementNode.nodeType !== Node.ELEMENT_NODE) { - throw Error(`Could not locate element to scroll on.`); - } + await locator.evaluate( + (element) => { + const waitForScrollEnd = (el: HTMLElement | Element) => + new Promise((resolve) => { + let last = el.scrollTop ?? 0; + const check = () => { + const cur = el.scrollTop ?? 0; + if (cur === last) return resolve(); + last = cur; + requestAnimationFrame(check); + }; + requestAnimationFrame(check); + }); - const element = elementNode as HTMLElement; const tagName = element.tagName.toLowerCase(); - let height: number; if (tagName === "html" || tagName === "body") { - height = window.visualViewport.height; - window.scrollBy({ - top: -height, - left: 0, - behavior: "smooth", - }); + const height = window.visualViewport?.height ?? window.innerHeight; + window.scrollBy({ top: -height, left: 0, behavior: "smooth" }); - const scrollingEl = - document.scrollingElement || document.documentElement; - return window.waitForElementScrollEnd(scrollingEl as HTMLElement); - } else { - height = element.getBoundingClientRect().height; - element.scrollBy({ - top: -height, - left: 0, - behavior: "smooth", - }); - return window.waitForElementScrollEnd(element); + const rootScrollingEl = (document.scrollingElement ?? + document.documentElement) as HTMLElement; + + return waitForScrollEnd(rootScrollingEl); } + const height = (element as HTMLElement).getBoundingClientRect().height; + (element as HTMLElement).scrollBy({ + top: -height, + left: 0, + behavior: "smooth", + }); + return waitForScrollEnd(element); }, - { xpath }, + undefined, + { timeout: 10_000 }, ); } catch (e) { logger({ @@ -215,7 +221,7 @@ export async function scrollElementIntoView(ctx: MethodHandlerContext) { } export async function scrollElementToPercentage(ctx: MethodHandlerContext) { - const { args, stagehandPage, xpath, logger } = ctx; + const { args, xpath, logger, locator } = ctx; logger({ category: "action", @@ -230,20 +236,14 @@ export async function scrollElementToPercentage(ctx: MethodHandlerContext) { try { const [yArg = "0%"] = args as string[]; - await stagehandPage.page.evaluate( - ({ xpath, yArg }) => { + await locator.evaluate( + (element, { yArg }) => { function parsePercent(val: string): number { const cleaned = val.trim().replace("%", ""); const num = parseFloat(cleaned); return Number.isNaN(num) ? 0 : Math.max(0, Math.min(num, 100)); } - const elementNode = getNodeFromXpath(xpath); - if (!elementNode || elementNode.nodeType !== Node.ELEMENT_NODE) { - throw Error(`Could not locate element to scroll on.`); - } - - const element = elementNode as HTMLElement; const yPct = parsePercent(yArg); if (element.tagName.toLowerCase() === "html") { @@ -266,7 +266,8 @@ export async function scrollElementToPercentage(ctx: MethodHandlerContext) { }); } }, - { xpath, yArg }, + { yArg }, + { timeout: 10_000 }, ); } catch (e) { logger({