diff --git a/.changeset/calm-snails-carry.md b/.changeset/calm-snails-carry.md new file mode 100644 index 000000000..20abadf89 --- /dev/null +++ b/.changeset/calm-snails-carry.md @@ -0,0 +1,5 @@ +--- +"@browserbasehq/stagehand": patch +--- + +add support for shadow DOMs (open & closed mode) when experimental: true diff --git a/evals/evals.config.json b/evals/evals.config.json index 126cb0946..92472be4b 100644 --- a/evals/evals.config.json +++ b/evals/evals.config.json @@ -420,12 +420,48 @@ "categories": ["agent"] }, { - "name": "iframe_scroll", + "name": "osr_in_oopif", + "categories": ["act"] + }, + { + "name": "csr_in_oopif", + "categories": ["act"] + }, + { + "name": "csr_in_spif", + "categories": ["act"] + }, + { + "name": "csr_in_spif", + "categories": ["act"] + }, + { + "name": "spif_in_osr", + "categories": ["act"] + }, + { + "name": "oopif_in_osr", + "categories": ["act"] + }, + { + "name": "spif_in_csr", + "categories": ["act"] + }, + { + "name": "oopif_in_csr", + "categories": ["act"] + }, + { + "name": "osr_in_spif", "categories": ["act"] }, { "name": "namespace_xpath", "categories": ["act"] + }, + { + "name": "iframe_scroll", + "categories": ["act"] } ] } diff --git a/evals/tasks/csr_in_oopif.ts b/evals/tasks/csr_in_oopif.ts new file mode 100644 index 000000000..e034e8906 --- /dev/null +++ b/evals/tasks/csr_in_oopif.ts @@ -0,0 +1,54 @@ +import { EvalFunction } from "@/types/evals"; + +export const csr_in_oopif: EvalFunction = async ({ + debugUrl, + sessionUrl, + stagehand, + logger, +}) => { + // this eval is designed to test whether stagehand can successfully + // click inside an CSR (closed mode shadow) root that is inside an + // OOPIF (out of process iframe) + + const page = stagehand.page; + try { + await page.goto( + "https://browserbase.github.io/stagehand-eval-sites/sites/closed-shadow-root-in-oopif/", + ); + await page.act({ action: "click the button", iframes: true }); + + const extraction = await page.extract({ + instruction: "extract the entire page text", + iframes: true, + }); + + const pageText = extraction.extraction; + + if (pageText.includes("button successfully clicked")) { + return { + _success: true, + message: `successfully clicked the button`, + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } + return { + _success: false, + message: `unable to click on the button`, + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } catch (error) { + return { + _success: false, + message: `error: ${error.message}`, + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } finally { + await stagehand.close(); + } +}; diff --git a/evals/tasks/csr_in_spif.ts b/evals/tasks/csr_in_spif.ts new file mode 100644 index 000000000..f74f45d14 --- /dev/null +++ b/evals/tasks/csr_in_spif.ts @@ -0,0 +1,54 @@ +import { EvalFunction } from "@/types/evals"; + +export const csr_in_spif: EvalFunction = async ({ + debugUrl, + sessionUrl, + stagehand, + logger, +}) => { + // this eval is designed to test whether stagehand can successfully + // click inside an CSR (closed mode shadow) root that is inside an + // SPIF (same process iframe) + + const page = stagehand.page; + try { + await page.goto( + "https://browserbase.github.io/stagehand-eval-sites/sites/closed-shadow-dom-in-spif/", + ); + await page.act({ action: "click the button", iframes: true }); + + const extraction = await page.extract({ + instruction: "extract the entire page text", + iframes: true, + }); + + const pageText = extraction.extraction; + + if (pageText.includes("button successfully clicked")) { + return { + _success: true, + message: `successfully clicked the button`, + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } + return { + _success: false, + message: `unable to click on the button`, + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } catch (error) { + return { + _success: false, + message: `error: ${error.message}`, + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } finally { + await stagehand.close(); + } +}; diff --git a/evals/tasks/oopif_in_csr.ts b/evals/tasks/oopif_in_csr.ts new file mode 100644 index 000000000..7badd3cd7 --- /dev/null +++ b/evals/tasks/oopif_in_csr.ts @@ -0,0 +1,57 @@ +import { EvalFunction } from "@/types/evals"; + +export const oopif_in_csr: EvalFunction = async ({ + debugUrl, + sessionUrl, + stagehand, + logger, +}) => { + // this eval is designed to test whether stagehand can successfully + // fill a form inside a OOPIF (out of process iframe) that is inside an + // CSR (closed mode shadow) root + + const page = stagehand.page; + try { + await page.goto( + "https://browserbase.github.io/stagehand-eval-sites/sites/oopif-in-open-shadow-dom/", + ); + await page.act({ + action: "fill 'nunya' into the first name field", + iframes: true, + }); + + const extraction = await page.extract({ + instruction: "extract the entire page text", + iframes: true, + }); + + const pageText = extraction.extraction; + + if (pageText.includes("nunya")) { + return { + _success: true, + message: `successfully filled the form`, + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } + return { + _success: false, + message: `unable to fill the form`, + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } catch (error) { + return { + _success: false, + message: `error: ${error.message}`, + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } finally { + await stagehand.close(); + } +}; diff --git a/evals/tasks/oopif_in_osr.ts b/evals/tasks/oopif_in_osr.ts new file mode 100644 index 000000000..32385b14a --- /dev/null +++ b/evals/tasks/oopif_in_osr.ts @@ -0,0 +1,57 @@ +import { EvalFunction } from "@/types/evals"; + +export const oopif_in_osr: EvalFunction = async ({ + debugUrl, + sessionUrl, + stagehand, + logger, +}) => { + // this eval is designed to test whether stagehand can successfully + // fill a form inside a OOPIF (out of process iframe) that is inside an + // OSR (open mode shadow) root + + const page = stagehand.page; + try { + await page.goto( + "https://browserbase.github.io/stagehand-eval-sites/sites/oopif-in-open-shadow-dom/", + ); + await page.act({ + action: "fill 'nunya' into the first name field", + iframes: true, + }); + + const extraction = await page.extract({ + instruction: "extract the entire page text", + iframes: true, + }); + + const pageText = extraction.extraction; + + if (pageText.includes("nunya")) { + return { + _success: true, + message: `successfully filled the form`, + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } + return { + _success: false, + message: `unable to fill the form`, + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } catch (error) { + return { + _success: false, + message: `error: ${error.message}`, + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } finally { + await stagehand.close(); + } +}; diff --git a/evals/tasks/osr_in_oopif.ts b/evals/tasks/osr_in_oopif.ts new file mode 100644 index 000000000..47ae0abd2 --- /dev/null +++ b/evals/tasks/osr_in_oopif.ts @@ -0,0 +1,54 @@ +import { EvalFunction } from "@/types/evals"; + +export const osr_in_oopif: EvalFunction = async ({ + debugUrl, + sessionUrl, + stagehand, + logger, +}) => { + // this eval is designed to test whether stagehand can successfully + // click inside an OSR (open mode shadow) root that is inside an + // OOPIF (out of process iframe) + + const page = stagehand.page; + try { + await page.goto( + "https://browserbase.github.io/stagehand-eval-sites/sites/open-shadow-root-in-oopif/", + ); + await page.act({ action: "click the button", iframes: true }); + + const extraction = await page.extract({ + instruction: "extract the entire page text", + iframes: true, + }); + + const pageText = extraction.extraction; + + if (pageText.includes("button successfully clicked")) { + return { + _success: true, + message: `successfully clicked the button`, + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } + return { + _success: false, + message: `unable to click on the button`, + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } catch (error) { + return { + _success: false, + message: `error: ${error.message}`, + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } finally { + await stagehand.close(); + } +}; diff --git a/evals/tasks/osr_in_spif.ts b/evals/tasks/osr_in_spif.ts new file mode 100644 index 000000000..86cb1bec5 --- /dev/null +++ b/evals/tasks/osr_in_spif.ts @@ -0,0 +1,54 @@ +import { EvalFunction } from "@/types/evals"; + +export const osr_in_spif: EvalFunction = async ({ + debugUrl, + sessionUrl, + stagehand, + logger, +}) => { + // this eval is designed to test whether stagehand can successfully + // click inside an OSR (open mode shadow) root that is inside an + // SPIF (same process iframe) + + const page = stagehand.page; + try { + await page.goto( + "https://browserbase.github.io/stagehand-eval-sites/sites/open-shadow-root-in-spif/", + ); + await page.act({ action: "click the button", iframes: true }); + + const extraction = await page.extract({ + instruction: "extract the entire page text", + iframes: true, + }); + + const pageText = extraction.extraction; + + if (pageText.includes("button successfully clicked")) { + return { + _success: true, + message: `successfully clicked the button`, + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } + return { + _success: false, + message: `unable to click on the button`, + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } catch (error) { + return { + _success: false, + message: `error: ${error.message}`, + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } finally { + await stagehand.close(); + } +}; diff --git a/evals/tasks/shadow_dom.ts b/evals/tasks/shadow_dom.ts index 8bedf474c..52cadb36c 100644 --- a/evals/tasks/shadow_dom.ts +++ b/evals/tasks/shadow_dom.ts @@ -11,9 +11,14 @@ export const shadow_dom: EvalFunction = async ({ await page.goto( "https://browserbase.github.io/stagehand-eval-sites/sites/shadow-dom/", ); - const result = await page.act("click the button"); + await page.act("click the button"); + const extraction = await page.extract({ + instruction: "extract the page text", + }); - if (!result.success && result.message.includes("not-supported")) { + const pageText = extraction.extraction; + + if (pageText.includes("button successfully clicked")) { return { _success: true, debugUrl, diff --git a/evals/tasks/spif_in_csr.ts b/evals/tasks/spif_in_csr.ts new file mode 100644 index 000000000..b12ac63ae --- /dev/null +++ b/evals/tasks/spif_in_csr.ts @@ -0,0 +1,54 @@ +import { EvalFunction } from "@/types/evals"; + +export const spif_in_csr: EvalFunction = async ({ + debugUrl, + sessionUrl, + stagehand, + logger, +}) => { + // this eval is designed to test whether stagehand can successfully + // click inside a SPIF (same process iframe) that is inside an + // CSR (closed mode shadow) root + + const page = stagehand.page; + try { + await page.goto( + "https://browserbase.github.io/stagehand-eval-sites/sites/spif-in-closed-shadow-dom/", + ); + await page.act({ action: "click the button", iframes: true }); + + const extraction = await page.extract({ + instruction: "extract the entire page text", + iframes: true, + }); + + const pageText = extraction.extraction; + + if (pageText.includes("button successfully clicked")) { + return { + _success: true, + message: `successfully clicked the button`, + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } + return { + _success: false, + message: `unable to click on the button`, + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } catch (error) { + return { + _success: false, + message: `error: ${error.message}`, + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } finally { + await stagehand.close(); + } +}; diff --git a/evals/tasks/spif_in_osr.ts b/evals/tasks/spif_in_osr.ts new file mode 100644 index 000000000..75cdbbed2 --- /dev/null +++ b/evals/tasks/spif_in_osr.ts @@ -0,0 +1,54 @@ +import { EvalFunction } from "@/types/evals"; + +export const spif_in_osr: EvalFunction = async ({ + debugUrl, + sessionUrl, + stagehand, + logger, +}) => { + // this eval is designed to test whether stagehand can successfully + // click inside a SPIF (same process iframe) that is inside an + // OSR (open mode shadow) root + + const page = stagehand.page; + try { + await page.goto( + "https://browserbase.github.io/stagehand-eval-sites/sites/spif-in-open-shadow-dom/", + ); + await page.act({ action: "click the button", iframes: true }); + + const extraction = await page.extract({ + instruction: "extract the entire page text", + iframes: true, + }); + + const pageText = extraction.extraction; + + if (pageText.includes("button successfully clicked")) { + return { + _success: true, + message: `successfully clicked the button`, + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } + return { + _success: false, + message: `unable to click on the button`, + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } catch (error) { + return { + _success: false, + message: `error: ${error.message}`, + debugUrl, + sessionUrl, + logs: logger.getLogs(), + }; + } finally { + await stagehand.close(); + } +}; diff --git a/lib/StagehandPage.ts b/lib/StagehandPage.ts index 09827b40c..e4fd45dfb 100644 --- a/lib/StagehandPage.ts +++ b/lib/StagehandPage.ts @@ -1,4 +1,5 @@ import type { CDPSession, Page as PlaywrightPage, Frame } from "playwright"; +import { selectors } from "playwright"; import { z } from "zod/v3"; import { Page, defaultExtractSchema } from "../types/page"; import { @@ -37,6 +38,9 @@ async function getCurrentRootFrameId(session: CDPSession): Promise { return frameTree.frame.id; } +/** ensure we register the custom selector only once per process */ +let stagehandSelectorRegistered = false; + export class StagehandPage { private stagehand: Stagehand; private rawPage: PlaywrightPage; @@ -119,18 +123,21 @@ export class StagehandPage { logger: this.stagehand.logger, stagehandPage: this, selfHeal: this.stagehand.selfHeal, + experimental: this.stagehand.experimental, }); this.extractHandler = new StagehandExtractHandler({ stagehand: this.stagehand, logger: this.stagehand.logger, stagehandPage: this, userProvidedInstructions, + experimental: this.stagehand.experimental, }); this.observeHandler = new StagehandObserveHandler({ stagehand: this.stagehand, logger: this.stagehand.logger, stagehandPage: this, userProvidedInstructions, + experimental: this.stagehand.experimental, }); } } @@ -188,6 +195,113 @@ ${scriptContent} \ } } + /** Register the custom selector engine that pierces open/closed shadow roots. */ + private async ensureStagehandSelectorEngine(): Promise { + if (stagehandSelectorRegistered) return; + stagehandSelectorRegistered = true; + + await selectors.register("stagehand", () => { + type Backdoor = { + getClosedRoot?: (host: Element) => ShadowRoot | undefined; + }; + + function parseSelector(input: string): { name: string; value: string } { + // Accept either: "abc123" → uses DEFAULT_ATTR + // or explicitly: "data-__stagehand-id=abc123" + const raw = input.trim(); + const eq = raw.indexOf("="); + if (eq === -1) { + return { + name: "data-__stagehand-id", + value: raw.replace(/^["']|["']$/g, ""), + }; + } + const name = raw.slice(0, eq).trim(); + const value = raw + .slice(eq + 1) + .trim() + .replace(/^["']|["']$/g, ""); + return { name, value }; + } + + function pushChildren(node: Node, stack: Node[]): void { + if (node.nodeType === Node.DOCUMENT_NODE) { + const de = (node as Document).documentElement; + if (de) stack.push(de); + return; + } + + if (node.nodeType === Node.DOCUMENT_FRAGMENT_NODE) { + const frag = node as DocumentFragment; + const hc = frag.children as HTMLCollection | undefined; + if (hc && hc.length) { + for (let i = hc.length - 1; i >= 0; i--) + stack.push(hc[i] as Element); + } else { + const cn = frag.childNodes; + for (let i = cn.length - 1; i >= 0; i--) stack.push(cn[i]); + } + return; + } + + if (node.nodeType === Node.ELEMENT_NODE) { + const el = node as Element; + for (let i = el.children.length - 1; i >= 0; i--) + stack.push(el.children[i]); + } + } + + function* traverseAllTrees( + start: Node, + ): Generator { + const backdoor = window.__stagehand__ as Backdoor | undefined; + const stack: Node[] = []; + + if (start.nodeType === Node.DOCUMENT_NODE) { + const de = (start as Document).documentElement; + if (de) stack.push(de); + } else { + stack.push(start); + } + + while (stack.length) { + const node = stack.pop()!; + if (node.nodeType === Node.ELEMENT_NODE) { + const el = node as Element; + yield el; + + // open shadow + const open = el.shadowRoot as ShadowRoot | null; + if (open) stack.push(open); + + // closed shadow via backdoor + const closed = backdoor?.getClosedRoot?.(el); + if (closed) stack.push(closed); + } + pushChildren(node, stack); + } + } + + return { + query(root: Node, selector: string): Element | null { + const { name, value } = parseSelector(selector); + for (const el of traverseAllTrees(root)) { + if (el.getAttribute(name) === value) return el; + } + return null; + }, + queryAll(root: Node, selector: string): Element[] { + const { name, value } = parseSelector(selector); + const out: Element[] = []; + for (const el of traverseAllTrees(root)) { + if (el.getAttribute(name) === value) out.push(el); + } + return out; + }, + }; + }); + } + /** * Waits for a captcha to be solved when using Browserbase environment. * @@ -410,6 +524,10 @@ ${scriptContent} \ this.intContext.registerFrameId(rootId, this); this.intPage = new Proxy(page, handler) as unknown as Page; + + // Ensure backdoor and selector engine are ready up front + await this.ensureStagehandSelectorEngine(); + this.initialized = true; return this; } catch (err: unknown) { diff --git a/lib/a11y/utils.ts b/lib/a11y/utils.ts index 06dc8c4b3..49c9d8ab5 100644 --- a/lib/a11y/utils.ts +++ b/lib/a11y/utils.ts @@ -120,11 +120,13 @@ const lc = (raw: string): string => { /** * Build mappings from CDP backendNodeIds to HTML tag names and relative XPaths. * + * @param experimental - Whether to use experimental behaviour. * @param sp - The StagehandPage wrapper for Playwright and CDP calls. * @param targetFrame - Optional Playwright.Frame whose DOM subtree to map; defaults to main frame. * @returns A Promise resolving to BackendIdMaps containing tagNameMap and xpathMap. */ export async function buildBackendIdMaps( + experimental: boolean, sp: StagehandPage, targetFrame?: Frame, ): Promise { @@ -171,11 +173,22 @@ export async function buildBackendIdMaps( let iframeNode: DOMNode | undefined; const locate = (n: DOMNode): boolean => { - if (n.backendNodeId === backendNodeId) return (iframeNode = n), true; - return ( - (n.children?.some(locate) ?? false) || - (n.contentDocument ? locate(n.contentDocument) : false) - ); + if (experimental) { + if (n.backendNodeId === backendNodeId) { + iframeNode = n; + return true; + } + if (n.shadowRoots?.some(locate)) return true; + if (n.children?.some(locate)) return true; + if (n.contentDocument && locate(n.contentDocument)) return true; + return false; + } else { + if (n.backendNodeId === backendNodeId) return (iframeNode = n), true; + return ( + (n.children?.some(locate) ?? false) || + (n.contentDocument ? locate(n.contentDocument) : false) + ); + } }; if (!locate(root) || !iframeNode?.contentDocument) { @@ -197,6 +210,9 @@ export async function buildBackendIdMaps( const stack: StackEntry[] = [{ node: startNode, path: "", fid: rootFid }]; const seen = new Set(); + const joinStep = (base: string, step: string): string => + base.endsWith("//") ? `${base}${step}` : `${base}/${step}`; + while (stack.length) { const { node, path, fid } = stack.pop()!; @@ -214,6 +230,16 @@ export async function buildBackendIdMaps( stack.push({ node: node.contentDocument, path: "", fid: childFid }); } + if (node.shadowRoots?.length && experimental) { + for (const shadowRoot of node.shadowRoots) { + stack.push({ + node: shadowRoot, + path: `${path}//`, + fid, + }); + } + } + // push children const kids = node.children ?? []; if (kids.length) { @@ -241,7 +267,7 @@ export async function buildBackendIdMaps( for (let i = kids.length - 1; i >= 0; i--) { stack.push({ node: kids[i]!, - path: `${path}/${segs[i]}`, + path: joinStep(path, segs[i]!), fid, }); } @@ -493,6 +519,7 @@ export async function getCDPFrameId( * @returns A Promise resolving to a TreeResult with the hierarchical AX tree and related metadata. */ export async function getAccessibilityTree( + experimental: boolean, stagehandPage: StagehandPage, logger: (log: LogLine) => void, selector?: string, @@ -500,6 +527,7 @@ export async function getAccessibilityTree( ): Promise { // 0. DOM helpers (maps, xpath) const { tagNameMap, xpathMap } = await buildBackendIdMaps( + experimental, stagehandPage, targetFrame, ); @@ -708,6 +736,57 @@ export async function getFrameRootBackendNodeId( * @param frame - The Playwright.Frame whose iframe element to locate. * @returns A Promise resolving to the XPath of the iframe element, or "/" if no frame provided. */ +export async function getFrameRootXpathWithShadow( + frame: Frame | undefined, +): Promise { + // Return root path when no frame context is provided + if (!frame) { + return "/"; + } + // Obtain the element handle of the iframe in the embedding document + const handle = await frame.frameElement(); + // Evaluate the element's absolute XPath within the page context + return handle.evaluate((node: Element) => { + function stepFor(el: Element): string { + const tag = el.tagName.toLowerCase(); + let i = 1; + for ( + let sib = el.previousElementSibling; + sib; + sib = sib.previousElementSibling + ) { + if (sib.tagName.toLowerCase() === tag) i++; + } + return `${tag}[${i}]`; + } + + const segs: string[] = []; + let el: Element | null = node; + + while (el) { + segs.unshift(stepFor(el)); + if (el.parentElement) { + el = el.parentElement; + continue; + } + + // top of this tree: check if we’re inside a shadow root + const root = el.getRootNode(); // Document or ShadowRoot + if ((root as ShadowRoot).host) { + // Insert a shadow hop marker so the final path contains “//” + segs.unshift(""); + el = (root as ShadowRoot).host; + continue; + } + + break; + } + + // Leading '/' + join; empty tokens become “//” between segments + return "/" + segs.join("/"); + }); +} + export async function getFrameRootXpath( frame: Frame | undefined, ): Promise { @@ -840,6 +919,7 @@ export function injectSubtrees( * @returns A Promise resolving to CombinedA11yResult with combined tree text, xpath map, and URL map. */ export async function getAccessibilityTreeWithFrames( + experimental: boolean, stagehandPage: StagehandPage, logger: (l: LogLine) => void, rootXPath?: string, @@ -887,6 +967,7 @@ export async function getAccessibilityTreeWithFrames( try { const res = await getAccessibilityTree( + experimental, stagehandPage, logger, selector, @@ -899,7 +980,13 @@ export async function getAccessibilityTreeWithFrames( ? null : await getFrameRootBackendNodeId(stagehandPage, frame); - const frameXpath = frame === main ? "/" : await getFrameRootXpath(frame); + let frameXpath; + if (experimental) { + frameXpath = + frame === main ? "/" : await getFrameRootXpathWithShadow(frame); + } else { + frameXpath = frame === main ? "/" : await getFrameRootXpath(frame); + } // Resolve the CDP frameId for this Playwright Frame (undefined for main) const frameId = await getCDPFrameId(stagehandPage, frame); diff --git a/lib/dom/global.d.ts b/lib/dom/global.d.ts index 863aeb77a..8425e29ba 100644 --- a/lib/dom/global.d.ts +++ b/lib/dom/global.d.ts @@ -1,4 +1,9 @@ -export {}; +export interface StagehandBackdoor { + /** Closed shadow-root accessors */ + getClosedRoot(host: Element): ShadowRoot | undefined; + queryClosed(host: Element, selector: string): Element[]; + xpathClosed(host: Element, xpath: string): Node[]; +} declare global { interface Window { __stagehandInjected?: boolean; @@ -8,5 +13,6 @@ declare global { getScrollableElementXpaths: (topN?: number) => Promise; getNodeFromXpath: (xpath: string) => Node | null; waitForElementScrollEnd: (element: HTMLElement) => Promise; + readonly __stagehand__?: StagehandBackdoor; } } diff --git a/lib/dom/process.ts b/lib/dom/process.ts index 1fce2b196..c0057486e 100644 --- a/lib/dom/process.ts +++ b/lib/dom/process.ts @@ -73,6 +73,68 @@ export async function getScrollableElementXpaths( return xpaths; } +(() => { + // Map for every root created in closed mode + const closedRoots: WeakMap = new WeakMap(); + + // Preserve the original method + const nativeAttachShadow = Element.prototype.attachShadow; + + // Intercept *before any page script runs* + Element.prototype.attachShadow = function (init: ShadowRootInit): ShadowRoot { + const root = nativeAttachShadow.call(this, init); + if (init.mode === "closed") closedRoots.set(this, root); + return root; + }; + + interface StagehandBackdoor { + /** Get the real ShadowRoot (undefined if host has none / is open) */ + getClosedRoot(host: Element): ShadowRoot | undefined; + + /** CSS‑selector search inside that root */ + queryClosed(host: Element, selector: string): Element[]; + + /** XPath search inside that root (relative XPath supported) */ + xpathClosed(host: Element, xpath: string): Node[]; + } + + const backdoor: StagehandBackdoor = { + getClosedRoot: (host) => closedRoots.get(host), + + queryClosed: (host, selector) => { + const root = closedRoots.get(host); + return root ? Array.from(root.querySelectorAll(selector)) : []; + }, + + xpathClosed: (host, xp) => { + const root = closedRoots.get(host); + if (!root) return []; + const it = document.evaluate( + xp, + root, + null, + XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, + null, + ); + const out: Node[] = []; + for (let i = 0; i < it.snapshotLength; ++i) { + const n = it.snapshotItem(i); + if (n) out.push(n); + } + return out; + }, + }; + + if (!("__stagehand__" in window)) { + Object.defineProperty(window, "__stagehand__", { + value: backdoor, + enumerable: false, + writable: false, + configurable: false, + }); + } +})(); + window.getScrollableElementXpaths = getScrollableElementXpaths; window.getNodeFromXpath = getNodeFromXpath; window.waitForElementScrollEnd = waitForElementScrollEnd; diff --git a/lib/handlers/actHandler.ts b/lib/handlers/actHandler.ts index ba260b5d9..91893b243 100644 --- a/lib/handlers/actHandler.ts +++ b/lib/handlers/actHandler.ts @@ -18,6 +18,7 @@ import { methodHandlerMap, fallbackLocatorMethod, deepLocator, + deepLocatorWithShadow, } from "./handlerUtils/actHandlerUtils"; import { StagehandObserveHandler } from "@/lib/handlers/observeHandler"; import { StagehandInvalidArgumentError } from "@/types/stagehandErrors"; @@ -30,19 +31,23 @@ export class StagehandActHandler { private readonly stagehandPage: StagehandPage; private readonly logger: (logLine: LogLine) => void; private readonly selfHeal: boolean; + private readonly experimental: boolean; constructor({ logger, stagehandPage, selfHeal, + experimental, }: { logger: (logLine: LogLine) => void; stagehandPage: StagehandPage; selfHeal: boolean; + experimental: boolean; }) { this.logger = logger; this.stagehandPage = stagehandPage; this.selfHeal = selfHeal; + this.experimental = experimental; } /** @@ -311,7 +316,13 @@ export class StagehandActHandler { domSettleTimeoutMs?: number, ) { const xpath = rawXPath.replace(/^xpath=/i, "").trim(); - const locator = deepLocator(this.stagehandPage.page, xpath).first(); + let locator; + if (this.experimental) { + locator = await deepLocatorWithShadow(this.stagehandPage.page, xpath); + } else { + locator = deepLocator(this.stagehandPage.page, xpath); + } + const initialUrl = this.stagehandPage.page.url(); this.logger({ diff --git a/lib/handlers/extractHandler.ts b/lib/handlers/extractHandler.ts index 8131e78e7..2777adf0c 100644 --- a/lib/handlers/extractHandler.ts +++ b/lib/handlers/extractHandler.ts @@ -18,12 +18,14 @@ export class StagehandExtractHandler { private readonly stagehandPage: StagehandPage; private readonly logger: (logLine: LogLine) => void; private readonly userProvidedInstructions?: string; + private readonly experimental: boolean; constructor({ stagehand, logger, stagehandPage, userProvidedInstructions, + experimental, }: { stagehand: Stagehand; logger: (message: { @@ -34,11 +36,13 @@ export class StagehandExtractHandler { }) => void; stagehandPage: StagehandPage; userProvidedInstructions?: string; + experimental: boolean; }) { this.stagehand = stagehand; this.logger = logger; this.stagehandPage = stagehandPage; this.userProvidedInstructions = userProvidedInstructions; + this.experimental = experimental; } public async extract({ @@ -97,7 +101,11 @@ export class StagehandExtractHandler { domSettleTimeoutMs?: number, ): Promise<{ page_text?: string }> { await this.stagehandPage._waitForSettledDom(domSettleTimeoutMs); - const tree = await getAccessibilityTree(this.stagehandPage, this.logger); + const tree = await getAccessibilityTree( + this.experimental, + this.stagehandPage, + this.logger, + ); this.logger({ category: "extraction", message: "Getting accessibility tree data", @@ -147,6 +155,7 @@ export class StagehandExtractHandler { discoveredIframes, } = await (iframes ? getAccessibilityTreeWithFrames( + this.experimental, this.stagehandPage, this.logger, targetXpath, @@ -156,14 +165,17 @@ export class StagehandExtractHandler { combinedXpathMap: {} as Record, discoveredIframes: [] as undefined, })) - : getAccessibilityTree(this.stagehandPage, this.logger, targetXpath).then( - ({ simplified, idToUrl, iframes: frameNodes }) => ({ - combinedTree: simplified, - combinedUrlMap: idToUrl as Record, - combinedXpathMap: {} as Record, - discoveredIframes: frameNodes, - }), - )); + : getAccessibilityTree( + this.experimental, + this.stagehandPage, + this.logger, + targetXpath, + ).then(({ simplified, idToUrl, iframes: frameNodes }) => ({ + combinedTree: simplified, + combinedUrlMap: idToUrl as Record, + combinedXpathMap: {} as Record, + discoveredIframes: frameNodes, + }))); this.logger({ category: "extraction", diff --git a/lib/handlers/handlerUtils/actHandlerUtils.ts b/lib/handlers/handlerUtils/actHandlerUtils.ts index de34519c0..fdd760adb 100644 --- a/lib/handlers/handlerUtils/actHandlerUtils.ts +++ b/lib/handlers/handlerUtils/actHandlerUtils.ts @@ -3,10 +3,181 @@ import { PlaywrightCommandException } from "../../../types/playwright"; import { StagehandPage } from "../../StagehandPage"; import { Logger } from "../../../types/log"; import { MethodHandlerContext } from "@/types/act"; -import { StagehandClickError } from "@/types/stagehandErrors"; +import { + StagehandClickError, + StagehandShadowRootMissingError, + StagehandShadowSegmentEmptyError, + StagehandShadowSegmentNotFoundError, +} from "@/types/stagehandErrors"; const IFRAME_STEP_RE = /^iframe(\[[^\]]+])?$/i; +function stepToCss(step: string): string { + const m = step.match(/^([a-zA-Z*][\w-]*)(?:\[(\d+)])?$/); + if (!m) return step; + const [, tag, idxRaw] = m; + const idx = idxRaw ? Number(idxRaw) : null; + if (tag === "*") return idx ? `*:nth-child(${idx})` : `*`; + return idx ? `${tag}:nth-of-type(${idx})` : tag; +} + +const buildDirect = (steps: string[]) => steps.map(stepToCss).join(" > "); +const buildDesc = (steps: string[]) => steps.map(stepToCss).join(" "); + +/** Resolve one contiguous shadow segment and return a stable Locator. */ +async function resolveShadowSegment( + hostLoc: Locator, + shadowSteps: string[], + attr = "data-__stagehand-id", + timeout = 1500, +): Promise { + const direct = buildDirect(shadowSteps); + const desc = buildDesc(shadowSteps); + + type Result = { id: string | null; noRoot: boolean }; + + const { id, noRoot } = await hostLoc.evaluate< + Result, + { direct: string; desc: string; attr: string; timeout: number } + >( + (host, { direct, desc, attr, timeout }) => { + interface StagehandClosedAccess { + getClosedRoot?: (h: Element) => ShadowRoot | undefined; + } + const backdoor = ( + window as Window & { + __stagehand__?: StagehandClosedAccess; + } + ).__stagehand__; + + const root = + (host as HTMLElement).shadowRoot ?? backdoor?.getClosedRoot?.(host); + if (!root) return { id: null, noRoot: true }; + + const tryFind = () => + (root.querySelector(direct) as Element | null) ?? + (root.querySelector(desc) as Element | null); + + return new Promise((resolve) => { + const mark = (el: Element): Result => { + let v = el.getAttribute(attr); + if (!v) { + v = + "sh_" + + Math.random().toString(36).slice(2) + + Date.now().toString(36); + el.setAttribute(attr, v); + } + return { id: v, noRoot: false }; + }; + + const first = tryFind(); + if (first) return resolve(mark(first)); + + const start = Date.now(); + const tick = () => { + const el = tryFind(); + if (el) return resolve(mark(el)); + if (Date.now() - start >= timeout) + return resolve({ id: null, noRoot: false }); + setTimeout(tick, 50); + }; + tick(); + }); + }, + { direct, desc, attr, timeout }, + ); + + if (noRoot) { + throw new StagehandShadowRootMissingError( + `segment='${shadowSteps.join("/")}'`, + ); + } + if (!id) { + throw new StagehandShadowSegmentNotFoundError(shadowSteps.join("/")); + } + + return hostLoc.locator(`stagehand=${id}`); +} + +export async function deepLocatorWithShadow( + root: Page | FrameLocator, + xpath: string, +): Promise { + // 1 ─ prepend with slash if not already included + if (!xpath.startsWith("/")) xpath = "/" + xpath; + const tokens = xpath.split("/"); // keep "" from "//" + + let ctx: Page | FrameLocator | Locator = root; + let buffer: string[] = []; + let elementScoped = false; + + const xp = () => (elementScoped ? "xpath=./" : "xpath=/"); + + const flushIntoFrame = () => { + if (!buffer.length) return; + ctx = (ctx as Page | FrameLocator | Locator).frameLocator( + xp() + buffer.join("/"), + ); + buffer = []; + elementScoped = false; + }; + + const flushIntoLocator = () => { + if (!buffer.length) return; + ctx = (ctx as Page | FrameLocator | Locator).locator( + xp() + buffer.join("/"), + ); + buffer = []; + elementScoped = true; + }; + + for (let i = 1; i < tokens.length; i++) { + const step = tokens[i]; + + // Shadow hop: “//” + if (step === "") { + flushIntoLocator(); + + // collect full shadow segment until next hop/iframe/end + const seg: string[] = []; + let j = i + 1; + for (; j < tokens.length; j++) { + const t = tokens[j]; + if (t === "" || IFRAME_STEP_RE.test(t)) break; + seg.push(t); + } + if (!seg.length) throw new StagehandShadowSegmentEmptyError(); + + // resolve inside the shadow root + ctx = await resolveShadowSegment(ctx as Locator, seg); + elementScoped = true; + + i = j - 1; + continue; + } + + // Normal DOM step + buffer.push(step); + + // iframe hop → descend into frame + if (IFRAME_STEP_RE.test(step)) flushIntoFrame(); + } + + if (buffer.length === 0) { + // If we’re already element-scoped, we already have the final Locator. + if (elementScoped) return ctx as Locator; + + // Otherwise (page/frame scoped), return the root element of the current doc. + return (ctx as Page | FrameLocator).locator("xpath=/"); + } + + // Otherwise, resolve the remaining buffered steps. + return (ctx as Page | FrameLocator | Locator).locator( + xp() + buffer.join("/"), + ); +} + export function deepLocator(root: Page | FrameLocator, xpath: string): Locator { // 1 ─ prepend with slash if not already included if (!xpath.startsWith("/")) xpath = "/" + xpath; diff --git a/lib/handlers/observeHandler.ts b/lib/handlers/observeHandler.ts index c14e4d6ea..5a3a3e95a 100644 --- a/lib/handlers/observeHandler.ts +++ b/lib/handlers/observeHandler.ts @@ -14,6 +14,7 @@ export class StagehandObserveHandler { private readonly stagehand: Stagehand; private readonly logger: (logLine: LogLine) => void; private readonly stagehandPage: StagehandPage; + private readonly experimental: boolean; private readonly userProvidedInstructions?: string; constructor({ @@ -21,16 +22,19 @@ export class StagehandObserveHandler { logger, stagehandPage, userProvidedInstructions, + experimental, }: { stagehand: Stagehand; logger: (logLine: LogLine) => void; stagehandPage: StagehandPage; userProvidedInstructions?: string; + experimental: boolean; }) { this.stagehand = stagehand; this.logger = logger; this.stagehandPage = stagehandPage; this.userProvidedInstructions = userProvidedInstructions; + this.experimental = experimental; } public async observe({ @@ -88,21 +92,25 @@ export class StagehandObserveHandler { level: 1, }); const { combinedTree, combinedXpathMap, discoveredIframes } = await (iframes - ? getAccessibilityTreeWithFrames(this.stagehandPage, this.logger).then( - ({ combinedTree, combinedXpathMap }) => ({ - combinedTree, - combinedXpathMap, - discoveredIframes: [] as AccessibilityNode[], - }), - ) - : getAccessibilityTree(this.stagehandPage, this.logger).then( - ({ simplified, xpathMap, idToUrl, iframes: frameNodes }) => ({ - combinedTree: simplified, - combinedXpathMap: xpathMap, - combinedUrlMap: idToUrl, - discoveredIframes: frameNodes, - }), - )); + ? getAccessibilityTreeWithFrames( + this.experimental, + this.stagehandPage, + this.logger, + ).then(({ combinedTree, combinedXpathMap }) => ({ + combinedTree, + combinedXpathMap, + discoveredIframes: [] as AccessibilityNode[], + })) + : getAccessibilityTree( + this.experimental, + this.stagehandPage, + this.logger, + ).then(({ simplified, xpathMap, idToUrl, iframes: frameNodes }) => ({ + combinedTree: simplified, + combinedXpathMap: xpathMap, + combinedUrlMap: idToUrl, + discoveredIframes: frameNodes, + }))); // No screenshot or vision-based annotation is performed const observationResponse = await observe({ diff --git a/types/stagehandErrors.ts b/types/stagehandErrors.ts index fdc5b4ea7..02819cd88 100644 --- a/types/stagehandErrors.ts +++ b/types/stagehandErrors.ts @@ -229,3 +229,27 @@ export class StagehandInitError extends StagehandError { super(message); } } + +export class StagehandShadowRootMissingError extends StagehandError { + constructor(detail?: string) { + super( + `No shadow root present on the resolved host` + + (detail ? `: ${detail}` : ""), + ); + } +} + +export class StagehandShadowSegmentEmptyError extends StagehandError { + constructor() { + super(`Empty selector segment after shadow-DOM hop ("//")`); + } +} + +export class StagehandShadowSegmentNotFoundError extends StagehandError { + constructor(segment: string, hint?: string) { + super( + `Shadow segment '${segment}' matched no element inside shadow root` + + (hint ? ` ${hint}` : ""), + ); + } +}