Skip to content

Commit 40636b1

Browse files
committed
put shadow dom support behind experimental flag
1 parent c8892b6 commit 40636b1

File tree

6 files changed

+147
-36
lines changed

6 files changed

+147
-36
lines changed

lib/StagehandPage.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,18 +123,21 @@ export class StagehandPage {
123123
logger: this.stagehand.logger,
124124
stagehandPage: this,
125125
selfHeal: this.stagehand.selfHeal,
126+
experimental: this.stagehand.experimental,
126127
});
127128
this.extractHandler = new StagehandExtractHandler({
128129
stagehand: this.stagehand,
129130
logger: this.stagehand.logger,
130131
stagehandPage: this,
131132
userProvidedInstructions,
133+
experimental: this.stagehand.experimental,
132134
});
133135
this.observeHandler = new StagehandObserveHandler({
134136
stagehand: this.stagehand,
135137
logger: this.stagehand.logger,
136138
stagehandPage: this,
137139
userProvidedInstructions,
140+
experimental: this.stagehand.experimental,
138141
});
139142
}
140143
}

lib/a11y/utils.ts

Lines changed: 58 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -118,11 +118,13 @@ const lc = (raw: string): string => {
118118
/**
119119
* Build mappings from CDP backendNodeIds to HTML tag names and relative XPaths.
120120
*
121+
* @param experimental - Whether to use experimental behaviour.
121122
* @param sp - The StagehandPage wrapper for Playwright and CDP calls.
122123
* @param targetFrame - Optional Playwright.Frame whose DOM subtree to map; defaults to main frame.
123124
* @returns A Promise resolving to BackendIdMaps containing tagNameMap and xpathMap.
124125
*/
125126
export async function buildBackendIdMaps(
127+
experimental: boolean,
126128
sp: StagehandPage,
127129
targetFrame?: Frame,
128130
): Promise<BackendIdMaps> {
@@ -169,14 +171,22 @@ export async function buildBackendIdMaps(
169171

170172
let iframeNode: DOMNode | undefined;
171173
const locate = (n: DOMNode): boolean => {
172-
if (n.backendNodeId === backendNodeId) {
173-
iframeNode = n;
174-
return true;
174+
if (experimental) {
175+
if (n.backendNodeId === backendNodeId) {
176+
iframeNode = n;
177+
return true;
178+
}
179+
if (n.shadowRoots?.some(locate)) return true;
180+
if (n.children?.some(locate)) return true;
181+
if (n.contentDocument && locate(n.contentDocument)) return true;
182+
return false;
183+
} else {
184+
if (n.backendNodeId === backendNodeId) return (iframeNode = n), true;
185+
return (
186+
(n.children?.some(locate) ?? false) ||
187+
(n.contentDocument ? locate(n.contentDocument) : false)
188+
);
175189
}
176-
if (n.shadowRoots?.some(locate)) return true;
177-
if (n.children?.some(locate)) return true;
178-
if (n.contentDocument && locate(n.contentDocument)) return true;
179-
return false;
180190
};
181191

182192
if (!locate(root) || !iframeNode?.contentDocument) {
@@ -218,7 +228,7 @@ export async function buildBackendIdMaps(
218228
stack.push({ node: node.contentDocument, path: "", fid: childFid });
219229
}
220230

221-
if (node.shadowRoots?.length) {
231+
if (node.shadowRoots?.length && experimental) {
222232
for (const shadowRoot of node.shadowRoots) {
223233
stack.push({
224234
node: shadowRoot,
@@ -502,13 +512,15 @@ export async function getCDPFrameId(
502512
* @returns A Promise resolving to a TreeResult with the hierarchical AX tree and related metadata.
503513
*/
504514
export async function getAccessibilityTree(
515+
experimental: boolean,
505516
stagehandPage: StagehandPage,
506517
logger: (log: LogLine) => void,
507518
selector?: string,
508519
targetFrame?: Frame,
509520
): Promise<TreeResult> {
510521
// 0. DOM helpers (maps, xpath)
511522
const { tagNameMap, xpathMap } = await buildBackendIdMaps(
523+
experimental,
512524
stagehandPage,
513525
targetFrame,
514526
);
@@ -717,7 +729,7 @@ export async function getFrameRootBackendNodeId(
717729
* @param frame - The Playwright.Frame whose iframe element to locate.
718730
* @returns A Promise resolving to the XPath of the iframe element, or "/" if no frame provided.
719731
*/
720-
export async function getFrameRootXpath(
732+
export async function getFrameRootXpathWithShadow(
721733
frame: Frame | undefined,
722734
): Promise<string> {
723735
// Return root path when no frame context is provided
@@ -768,6 +780,34 @@ export async function getFrameRootXpath(
768780
});
769781
}
770782

783+
export async function getFrameRootXpath(
784+
frame: Frame | undefined,
785+
): Promise<string> {
786+
// Return root path when no frame context is provided
787+
if (!frame) {
788+
return "/";
789+
}
790+
// Obtain the element handle of the iframe in the embedding document
791+
const handle = await frame.frameElement();
792+
// Evaluate the element's absolute XPath within the page context
793+
return handle.evaluate((node: Element) => {
794+
const pos = (el: Element) => {
795+
let i = 1;
796+
for (
797+
let sib = el.previousElementSibling;
798+
sib;
799+
sib = sib.previousElementSibling
800+
)
801+
if (sib.tagName === el.tagName) i += 1;
802+
return i;
803+
};
804+
const segs: string[] = [];
805+
for (let el: Element | null = node; el; el = el.parentElement)
806+
segs.unshift(`${el.tagName.toLowerCase()}[${pos(el)}]`);
807+
return `/${segs.join("/")}`;
808+
});
809+
}
810+
771811
/**
772812
* Inject simplified subtree outlines into the main frame outline for nested iframes.
773813
* Walks the main tree text, looks for EncodedId labels, and inserts matching subtrees.
@@ -872,6 +912,7 @@ export function injectSubtrees(
872912
* @returns A Promise resolving to CombinedA11yResult with combined tree text, xpath map, and URL map.
873913
*/
874914
export async function getAccessibilityTreeWithFrames(
915+
experimental: boolean,
875916
stagehandPage: StagehandPage,
876917
logger: (l: LogLine) => void,
877918
rootXPath?: string,
@@ -919,6 +960,7 @@ export async function getAccessibilityTreeWithFrames(
919960

920961
try {
921962
const res = await getAccessibilityTree(
963+
experimental,
922964
stagehandPage,
923965
logger,
924966
selector,
@@ -931,7 +973,13 @@ export async function getAccessibilityTreeWithFrames(
931973
? null
932974
: await getFrameRootBackendNodeId(stagehandPage, frame);
933975

934-
const frameXpath = frame === main ? "/" : await getFrameRootXpath(frame);
976+
let frameXpath;
977+
if (experimental) {
978+
frameXpath =
979+
frame === main ? "/" : await getFrameRootXpathWithShadow(frame);
980+
} else {
981+
frameXpath = frame === main ? "/" : await getFrameRootXpath(frame);
982+
}
935983

936984
// Resolve the CDP frameId for this Playwright Frame (undefined for main)
937985
const frameId = await getCDPFrameId(stagehandPage, frame);

lib/handlers/actHandler.ts

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import {
1818
methodHandlerMap,
1919
fallbackLocatorMethod,
2020
deepLocator,
21+
deepLocatorWithShadow,
2122
} from "./handlerUtils/actHandlerUtils";
2223
import { StagehandObserveHandler } from "@/lib/handlers/observeHandler";
2324
import { StagehandInvalidArgumentError } from "@/types/stagehandErrors";
@@ -30,19 +31,23 @@ export class StagehandActHandler {
3031
private readonly stagehandPage: StagehandPage;
3132
private readonly logger: (logLine: LogLine) => void;
3233
private readonly selfHeal: boolean;
34+
private readonly experimental: boolean;
3335

3436
constructor({
3537
logger,
3638
stagehandPage,
3739
selfHeal,
40+
experimental,
3841
}: {
3942
logger: (logLine: LogLine) => void;
4043
stagehandPage: StagehandPage;
4144
selfHeal: boolean;
45+
experimental: boolean;
4246
}) {
4347
this.logger = logger;
4448
this.stagehandPage = stagehandPage;
4549
this.selfHeal = selfHeal;
50+
this.experimental = experimental;
4651
}
4752

4853
/**
@@ -311,7 +316,13 @@ export class StagehandActHandler {
311316
domSettleTimeoutMs?: number,
312317
) {
313318
const xpath = rawXPath.replace(/^xpath=/i, "").trim();
314-
const locator = await deepLocator(this.stagehandPage.page, xpath);
319+
let locator;
320+
if (this.experimental) {
321+
locator = await deepLocatorWithShadow(this.stagehandPage.page, xpath);
322+
} else {
323+
locator = deepLocator(this.stagehandPage.page, xpath);
324+
}
325+
315326
const initialUrl = this.stagehandPage.page.url();
316327

317328
this.logger({

lib/handlers/extractHandler.ts

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,14 @@ export class StagehandExtractHandler {
1818
private readonly stagehandPage: StagehandPage;
1919
private readonly logger: (logLine: LogLine) => void;
2020
private readonly userProvidedInstructions?: string;
21+
private readonly experimental: boolean;
2122

2223
constructor({
2324
stagehand,
2425
logger,
2526
stagehandPage,
2627
userProvidedInstructions,
28+
experimental,
2729
}: {
2830
stagehand: Stagehand;
2931
logger: (message: {
@@ -34,11 +36,13 @@ export class StagehandExtractHandler {
3436
}) => void;
3537
stagehandPage: StagehandPage;
3638
userProvidedInstructions?: string;
39+
experimental: boolean;
3740
}) {
3841
this.stagehand = stagehand;
3942
this.logger = logger;
4043
this.stagehandPage = stagehandPage;
4144
this.userProvidedInstructions = userProvidedInstructions;
45+
this.experimental = experimental;
4246
}
4347

4448
public async extract<T extends z.AnyZodObject>({
@@ -97,7 +101,11 @@ export class StagehandExtractHandler {
97101
domSettleTimeoutMs?: number,
98102
): Promise<{ page_text?: string }> {
99103
await this.stagehandPage._waitForSettledDom(domSettleTimeoutMs);
100-
const tree = await getAccessibilityTree(this.stagehandPage, this.logger);
104+
const tree = await getAccessibilityTree(
105+
this.experimental,
106+
this.stagehandPage,
107+
this.logger,
108+
);
101109
this.logger({
102110
category: "extraction",
103111
message: "Getting accessibility tree data",
@@ -147,6 +155,7 @@ export class StagehandExtractHandler {
147155
discoveredIframes,
148156
} = await (iframes
149157
? getAccessibilityTreeWithFrames(
158+
this.experimental,
150159
this.stagehandPage,
151160
this.logger,
152161
targetXpath,
@@ -156,14 +165,17 @@ export class StagehandExtractHandler {
156165
combinedXpathMap: {} as Record<EncodedId, string>,
157166
discoveredIframes: [] as undefined,
158167
}))
159-
: getAccessibilityTree(this.stagehandPage, this.logger, targetXpath).then(
160-
({ simplified, idToUrl, iframes: frameNodes }) => ({
161-
combinedTree: simplified,
162-
combinedUrlMap: idToUrl as Record<EncodedId, string>,
163-
combinedXpathMap: {} as Record<EncodedId, string>,
164-
discoveredIframes: frameNodes,
165-
}),
166-
));
168+
: getAccessibilityTree(
169+
this.experimental,
170+
this.stagehandPage,
171+
this.logger,
172+
targetXpath,
173+
).then(({ simplified, idToUrl, iframes: frameNodes }) => ({
174+
combinedTree: simplified,
175+
combinedUrlMap: idToUrl as Record<EncodedId, string>,
176+
combinedXpathMap: {} as Record<EncodedId, string>,
177+
discoveredIframes: frameNodes,
178+
})));
167179

168180
this.logger({
169181
category: "extraction",

lib/handlers/handlerUtils/actHandlerUtils.ts

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ async function resolveShadowSegment(
101101
return hostLoc.locator(`stagehand=${id}`);
102102
}
103103

104-
export async function deepLocator(
104+
export async function deepLocatorWithShadow(
105105
root: Page | FrameLocator,
106106
xpath: string,
107107
): Promise<Locator> {
@@ -179,6 +179,35 @@ export async function deepLocator(
179179
);
180180
}
181181

182+
export function deepLocator(root: Page | FrameLocator, xpath: string): Locator {
183+
// 1 ─ prepend with slash if not already included
184+
if (!xpath.startsWith("/")) xpath = "/" + xpath;
185+
186+
// 2 ─ split into steps, accumulate until we hit an iframe step
187+
const steps = xpath.split("/").filter(Boolean); // tokens
188+
let ctx: Page | FrameLocator = root;
189+
let buffer: string[] = [];
190+
191+
const flushIntoFrame = () => {
192+
if (buffer.length === 0) return;
193+
const selector = "xpath=/" + buffer.join("/");
194+
ctx = (ctx as Page | FrameLocator).frameLocator(selector);
195+
buffer = [];
196+
};
197+
198+
for (const step of steps) {
199+
buffer.push(step);
200+
if (IFRAME_STEP_RE.test(step)) {
201+
// we've included the <iframe> element in buffer ⇒ descend
202+
flushIntoFrame();
203+
}
204+
}
205+
206+
// 3 ─ whatever is left in buffer addresses the target *inside* the last ctx
207+
const finalSelector = "xpath=/" + buffer.join("/");
208+
return (ctx as Page | FrameLocator).locator(finalSelector);
209+
}
210+
182211
/**
183212
* A mapping of playwright methods that may be chosen by the LLM to their
184213
* implementation.

0 commit comments

Comments
 (0)