From cf777aeccab6f10f92c48248d11edebe09b333e3 Mon Sep 17 00:00:00 2001 From: Sean McGuire Date: Fri, 8 Aug 2025 16:59:26 -0700 Subject: [PATCH 01/13] generate correct IDs for shadow DOM elements --- lib/a11y/utils.ts | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/lib/a11y/utils.ts b/lib/a11y/utils.ts index e603961b..276fa661 100644 --- a/lib/a11y/utils.ts +++ b/lib/a11y/utils.ts @@ -195,6 +195,9 @@ export async function buildBackendIdMaps( const stack: StackEntry[] = [{ node: startNode, path: "", fid: rootFid }]; const seen = new Set(); + const joinStep = (base: string, step: string): string => + base.endsWith("//") ? `${base}${step}` : `${base}/${step}`; + while (stack.length) { const { node, path, fid } = stack.pop()!; @@ -212,6 +215,16 @@ export async function buildBackendIdMaps( stack.push({ node: node.contentDocument, path: "", fid: childFid }); } + if (node.shadowRoots?.length) { + for (const shadowRoot of node.shadowRoots) { + stack.push({ + node: shadowRoot, + path: `${path}//`, + fid, + }); + } + } + // push children const kids = node.children ?? []; if (kids.length) { @@ -234,7 +247,7 @@ export async function buildBackendIdMaps( for (let i = kids.length - 1; i >= 0; i--) { stack.push({ node: kids[i]!, - path: `${path}/${segs[i]}`, + path: joinStep(path, segs[i]!), fid, }); } From 624fadc0130f758eb14d054fdc43f4891e7974a8 Mon Sep 17 00:00:00 2001 From: Sean McGuire Date: Fri, 8 Aug 2025 18:49:25 -0700 Subject: [PATCH 02/13] add custom locator engine & shadow dom traversal logic --- lib/StagehandPage.ts | 143 ++++++++++++++- lib/dom/global.d.ts | 8 +- lib/dom/process.ts | 60 +++++++ lib/handlers/actHandler.ts | 2 +- lib/handlers/handlerUtils/actHandlerUtils.ts | 174 +++++++++++++++++-- types/stagehandErrors.ts | 24 +++ 6 files changed, 392 insertions(+), 19 deletions(-) diff --git a/lib/StagehandPage.ts b/lib/StagehandPage.ts index 09827b40..b7dd6692 100644 --- a/lib/StagehandPage.ts +++ b/lib/StagehandPage.ts @@ -1,4 +1,10 @@ -import type { CDPSession, Page as PlaywrightPage, Frame } from "playwright"; +import type { + CDPSession, + Page as PlaywrightPage, + Frame, + ElementHandle, +} from "playwright"; +import { selectors } from "playwright"; import { z } from "zod/v3"; import { Page, defaultExtractSchema } from "../types/page"; import { @@ -29,6 +35,7 @@ import { import { StagehandAPIError } from "@/types/stagehandApiErrors"; import { scriptContent } from "@/lib/dom/build/scriptContent"; import type { Protocol } from "devtools-protocol"; +import { StagehandBackdoor } from "@/lib/dom/global"; async function getCurrentRootFrameId(session: CDPSession): Promise { const { frameTree } = (await session.send( @@ -37,6 +44,9 @@ async function getCurrentRootFrameId(session: CDPSession): Promise { return frameTree.frame.id; } +/** ensure we register the custom selector only once per process */ +let stagehandSelectorRegistered = false; + export class StagehandPage { private stagehand: Stagehand; private rawPage: PlaywrightPage; @@ -188,6 +198,113 @@ ${scriptContent} \ } } + /** Register the custom selector engine that pierces open/closed shadow roots. */ + private async ensureStagehandSelectorEngine(): Promise { + if (stagehandSelectorRegistered) return; + stagehandSelectorRegistered = true; + + await selectors.register("stagehand", () => { + type Backdoor = { + getClosedRoot?: (host: Element) => ShadowRoot | undefined; + }; + + function parseSelector(input: string): { name: string; value: string } { + // Accept either: "abc123" → uses DEFAULT_ATTR + // or explicitly: "data-__stagehand-id=abc123" + const raw = input.trim(); + const eq = raw.indexOf("="); + if (eq === -1) { + return { + name: "data-__stagehand-id", + value: raw.replace(/^["']|["']$/g, ""), + }; + } + const name = raw.slice(0, eq).trim(); + const value = raw + .slice(eq + 1) + .trim() + .replace(/^["']|["']$/g, ""); + return { name, value }; + } + + function pushChildren(node: Node, stack: Node[]): void { + if (node.nodeType === Node.DOCUMENT_NODE) { + const de = (node as Document).documentElement; + if (de) stack.push(de); + return; + } + + if (node.nodeType === Node.DOCUMENT_FRAGMENT_NODE) { + const frag = node as DocumentFragment; + const hc = frag.children as HTMLCollection | undefined; + if (hc && hc.length) { + for (let i = hc.length - 1; i >= 0; i--) + stack.push(hc[i] as Element); + } else { + const cn = frag.childNodes; + for (let i = cn.length - 1; i >= 0; i--) stack.push(cn[i]); + } + return; + } + + if (node.nodeType === Node.ELEMENT_NODE) { + const el = node as Element; + for (let i = el.children.length - 1; i >= 0; i--) + stack.push(el.children[i]); + } + } + + function* traverseAllTrees( + start: Node, + ): Generator { + const backdoor = window.__stagehand__ as Backdoor | undefined; + const stack: Node[] = []; + + if (start.nodeType === Node.DOCUMENT_NODE) { + const de = (start as Document).documentElement; + if (de) stack.push(de); + } else { + stack.push(start); + } + + while (stack.length) { + const node = stack.pop()!; + if (node.nodeType === Node.ELEMENT_NODE) { + const el = node as Element; + yield el; + + // open shadow + const open = el.shadowRoot as ShadowRoot | null; + if (open) stack.push(open); + + // closed shadow via backdoor + const closed = backdoor?.getClosedRoot?.(el); + if (closed) stack.push(closed); + } + pushChildren(node, stack); + } + } + + return { + query(root: Node, selector: string): Element | null { + const { name, value } = parseSelector(selector); + for (const el of traverseAllTrees(root)) { + if (el.getAttribute(name) === value) return el; + } + return null; + }, + queryAll(root: Node, selector: string): Element[] { + const { name, value } = parseSelector(selector); + const out: Element[] = []; + for (const el of traverseAllTrees(root)) { + if (el.getAttribute(name) === value) out.push(el); + } + return out; + }, + }; + }); + } + /** * Waits for a captcha to be solved when using Browserbase environment. * @@ -410,6 +527,11 @@ ${scriptContent} \ this.intContext.registerFrameId(rootId, this); this.intPage = new Proxy(page, handler) as unknown as Page; + + // Ensure our backdoor and selector engine are ready up front + await this.ensureStagehandScript(); + await this.ensureStagehandSelectorEngine(); + this.initialized = true; return this; } catch (err: unknown) { @@ -999,4 +1121,23 @@ ${scriptContent} \ ): Promise { await this.sendCDP(`${domain}.disable`, {}, target); } + + async getShadowRootHandle( + this: StagehandPage, + host: ElementHandle, + ): Promise | null> { + const h = await host.evaluateHandle((el: Element): ShadowRoot | null => { + // Open root? + if ((el as HTMLElement).shadowRoot) + return (el as HTMLElement).shadowRoot!; + // Closed root kept in our isolated world + return ( + ( + window as Window & { __stagehand__?: StagehandBackdoor } + ).__stagehand__?.getClosedRoot(el) ?? null + ); + }); + + return h.asElement() as ElementHandle | null; + } } diff --git a/lib/dom/global.d.ts b/lib/dom/global.d.ts index 863aeb77..8425e29b 100644 --- a/lib/dom/global.d.ts +++ b/lib/dom/global.d.ts @@ -1,4 +1,9 @@ -export {}; +export interface StagehandBackdoor { + /** Closed shadow-root accessors */ + getClosedRoot(host: Element): ShadowRoot | undefined; + queryClosed(host: Element, selector: string): Element[]; + xpathClosed(host: Element, xpath: string): Node[]; +} declare global { interface Window { __stagehandInjected?: boolean; @@ -8,5 +13,6 @@ declare global { getScrollableElementXpaths: (topN?: number) => Promise; getNodeFromXpath: (xpath: string) => Node | null; waitForElementScrollEnd: (element: HTMLElement) => Promise; + readonly __stagehand__?: StagehandBackdoor; } } diff --git a/lib/dom/process.ts b/lib/dom/process.ts index 1fce2b19..8023336d 100644 --- a/lib/dom/process.ts +++ b/lib/dom/process.ts @@ -73,6 +73,66 @@ export async function getScrollableElementXpaths( return xpaths; } +(() => { + // Map for every root created in closed mode + const closedRoots: WeakMap = new WeakMap(); + + // Preserve the original method + const nativeAttachShadow = Element.prototype.attachShadow; + + // Intercept *before any page script runs* + Element.prototype.attachShadow = function (init: ShadowRootInit): ShadowRoot { + const root = nativeAttachShadow.call(this, init); + if (init.mode === "closed") closedRoots.set(this, root); + return root; + }; + + interface StagehandBackdoor { + /** Get the real ShadowRoot (undefined if host has none / is open) */ + getClosedRoot(host: Element): ShadowRoot | undefined; + + /** CSS‑selector search inside that root */ + queryClosed(host: Element, selector: string): Element[]; + + /** XPath search inside that root (relative XPath supported) */ + xpathClosed(host: Element, xpath: string): Node[]; + } + + const backdoor: StagehandBackdoor = { + getClosedRoot: (host) => closedRoots.get(host), + + queryClosed: (host, selector) => { + const root = closedRoots.get(host); + return root ? Array.from(root.querySelectorAll(selector)) : []; + }, + + xpathClosed: (host, xp) => { + const root = closedRoots.get(host); + if (!root) return []; + const it = document.evaluate( + xp, + root, + null, + XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, + null, + ); + const out: Node[] = []; + for (let i = 0; i < it.snapshotLength; ++i) { + const n = it.snapshotItem(i); + if (n) out.push(n); + } + return out; + }, + }; + + Object.defineProperty(window, "__stagehand__", { + value: backdoor, + enumerable: false, + writable: false, + configurable: false, + }); +})(); + window.getScrollableElementXpaths = getScrollableElementXpaths; window.getNodeFromXpath = getNodeFromXpath; window.waitForElementScrollEnd = waitForElementScrollEnd; diff --git a/lib/handlers/actHandler.ts b/lib/handlers/actHandler.ts index ba260b5d..734ee371 100644 --- a/lib/handlers/actHandler.ts +++ b/lib/handlers/actHandler.ts @@ -311,7 +311,7 @@ export class StagehandActHandler { domSettleTimeoutMs?: number, ) { const xpath = rawXPath.replace(/^xpath=/i, "").trim(); - const locator = deepLocator(this.stagehandPage.page, xpath).first(); + const locator = await deepLocator(this.stagehandPage.page, xpath); const initialUrl = this.stagehandPage.page.url(); this.logger({ diff --git a/lib/handlers/handlerUtils/actHandlerUtils.ts b/lib/handlers/handlerUtils/actHandlerUtils.ts index 1c32cf7f..8e2a7d32 100644 --- a/lib/handlers/handlerUtils/actHandlerUtils.ts +++ b/lib/handlers/handlerUtils/actHandlerUtils.ts @@ -4,37 +4,179 @@ import { StagehandPage } from "../../StagehandPage"; import { getNodeFromXpath } from "@/lib/dom/utils"; import { Logger } from "../../../types/log"; import { MethodHandlerContext } from "@/types/act"; -import { StagehandClickError } from "@/types/stagehandErrors"; +import { + StagehandClickError, + StagehandShadowRootMissingError, + StagehandShadowSegmentEmptyError, + StagehandShadowSegmentNotFoundError, +} from "@/types/stagehandErrors"; const IFRAME_STEP_RE = /^iframe(\[[^\]]+])?$/i; -export function deepLocator(root: Page | FrameLocator, xpath: string): Locator { +function stepToCss(step: string): string { + const m = step.match(/^([a-zA-Z*][\w-]*)(?:\[(\d+)])?$/); + if (!m) return step; + const [, tag, idxRaw] = m; + const idx = idxRaw ? Number(idxRaw) : null; + if (tag === "*") return idx ? `*:nth-child(${idx})` : `*`; + return idx ? `${tag}:nth-of-type(${idx})` : tag; +} + +const buildDirect = (steps: string[]) => steps.map(stepToCss).join(" > "); +const buildDesc = (steps: string[]) => steps.map(stepToCss).join(" "); + +/** Resolve one contiguous shadow segment and return a stable Locator. */ +async function resolveShadowSegment( + hostLoc: Locator, + shadowSteps: string[], + attr = "data-__stagehand-id", + timeout = 1500, +): Promise { + const direct = buildDirect(shadowSteps); + const desc = buildDesc(shadowSteps); + + type Result = { id: string | null; noRoot: boolean }; + + const { id, noRoot } = await hostLoc.evaluate< + Result, + { direct: string; desc: string; attr: string; timeout: number } + >( + (host, { direct, desc, attr, timeout }) => { + interface StagehandClosedAccess { + getClosedRoot?: (h: Element) => ShadowRoot | undefined; + } + const backdoor = ( + window as Window & { + __stagehand__?: StagehandClosedAccess; + } + ).__stagehand__; + + const root = + (host as HTMLElement).shadowRoot ?? backdoor?.getClosedRoot?.(host); + if (!root) return { id: null, noRoot: true }; + + const tryFind = () => + (root.querySelector(direct) as Element | null) ?? + (root.querySelector(desc) as Element | null); + + return new Promise((resolve) => { + const mark = (el: Element): Result => { + let v = el.getAttribute(attr); + if (!v) { + v = + "sh_" + + Math.random().toString(36).slice(2) + + Date.now().toString(36); + el.setAttribute(attr, v); + } + return { id: v, noRoot: false }; + }; + + const first = tryFind(); + if (first) return resolve(mark(first)); + + const start = Date.now(); + const tick = () => { + const el = tryFind(); + if (el) return resolve(mark(el)); + if (Date.now() - start >= timeout) + return resolve({ id: null, noRoot: false }); + setTimeout(tick, 50); + }; + tick(); + }); + }, + { direct, desc, attr, timeout }, + ); + + if (noRoot) { + throw new StagehandShadowRootMissingError( + `segment='${shadowSteps.join("/")}'`, + ); + } + if (!id) { + throw new StagehandShadowSegmentNotFoundError(shadowSteps.join("/")); + } + + return hostLoc.locator(`stagehand=${id}`); +} + +export async function deepLocator( + root: Page | FrameLocator, + xpath: string, +): Promise { // 1 ─ prepend with slash if not already included if (!xpath.startsWith("/")) xpath = "/" + xpath; + const tokens = xpath.split("/"); // keep "" from "//" - // 2 ─ split into steps, accumulate until we hit an iframe step - const steps = xpath.split("/").filter(Boolean); // tokens - let ctx: Page | FrameLocator = root; + let ctx: Page | FrameLocator | Locator = root; let buffer: string[] = []; + let elementScoped = false; + + const xp = () => (elementScoped ? "xpath=./" : "xpath=/"); const flushIntoFrame = () => { - if (buffer.length === 0) return; - const selector = "xpath=/" + buffer.join("/"); - ctx = (ctx as Page | FrameLocator).frameLocator(selector); + if (!buffer.length) return; + ctx = (ctx as Page | FrameLocator | Locator).frameLocator( + xp() + buffer.join("/"), + ); buffer = []; + elementScoped = false; }; - for (const step of steps) { - buffer.push(step); - if (IFRAME_STEP_RE.test(step)) { - // we've included the