Skip to content

Commit 3d80421

Browse files
enable scrolling inside of iframes (#919)
# why this PR addresses two issues, both related to scrolling inside iframes: 1. we are not calling `.evaluate` inside frames: - currently, the scrolling functionality that exists inside `performPlaywrightMethod` calls `page.evaluate` - this means that scrolling can only ever happen at the page level, not inside of iframes - inside each of these scroll related helpers, we already have access to a chained locator that optionally points to an element inside an iframe - therefore, we should use this chained locator, and call `locator.evaluate` 2. for SPIFs (same process iframes), we are not looking for scrollable elements in the correct execution context: - when we call `resolveObjectIdForXPath`, this executes in either the page level execution context, or, for OOPIFs (out of process iframes), the frame level execution context - this is problematic because SPIFs share the the same CDP session as the root document, which means that we are responsible for specifying the execution context. since we aren't doing this, `resolveObjectIdForXPath` only searches in the root document execution context, and can't see anything inside of the SPIF # what changed - to address issue number 1, I updated `scrollToNextChunk`, `scrollToPreviousChunk`, `scrollElementToPercentage` all use `locator.evaluate` instead of `page.evaluate` which enables scrolling inside (and outside) of iframes - to address issue number 2, I added a function `getFrameExecutionContextId` which creates an isolated world & returns a SPIF scoped execution context - we use this downstream in `resolveObjectIdForXPath` which guarantees that we are searching for scrollable elements in the correct execution context # test plan - added an eval for scrolling inside a same-process iframe - `act` evals - `targeted_extract` evals - `observe` evals - `extract` evals
1 parent 72d2683 commit 3d80421

File tree

5 files changed

+168
-65
lines changed

5 files changed

+168
-65
lines changed

.changeset/open-donkeys-shine.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@browserbasehq/stagehand": patch
3+
---
4+
5+
enable scrolling inside of iframes

evals/evals.config.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -419,6 +419,10 @@
419419
"name": "agent/sign_in",
420420
"categories": ["agent"]
421421
},
422+
{
423+
"name": "iframe_scroll",
424+
"categories": ["act"]
425+
},
422426
{
423427
"name": "namespace_xpath",
424428
"categories": ["act"]

evals/tasks/iframe_scroll.ts

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
import { EvalFunction } from "@/types/evals";
2+
3+
export const iframe_scroll: EvalFunction = async ({
4+
debugUrl,
5+
sessionUrl,
6+
stagehand,
7+
logger,
8+
}) => {
9+
try {
10+
await stagehand.page.goto(
11+
"https://browserbase.github.io/stagehand-eval-sites/sites/iframe-same-proc-scroll/",
12+
);
13+
await stagehand.page.act({
14+
action: "scroll down 50% inside the iframe",
15+
iframes: true,
16+
});
17+
18+
const frames = stagehand.page.frames();
19+
const frame = frames[1];
20+
21+
await new Promise((resolve) => setTimeout(resolve, 5000));
22+
23+
// Get the current scroll position and total scroll height
24+
const scrollInfo = await frame.evaluate(() => {
25+
return {
26+
scrollTop: window.scrollY + window.innerHeight / 2,
27+
scrollHeight: document.documentElement.scrollHeight,
28+
};
29+
});
30+
31+
const halfwayScroll = scrollInfo.scrollHeight / 2;
32+
const halfwayReached = Math.abs(scrollInfo.scrollTop - halfwayScroll) <= 1;
33+
const evaluationResult = halfwayReached
34+
? {
35+
_success: true,
36+
logs: logger.getLogs(),
37+
debugUrl,
38+
sessionUrl,
39+
}
40+
: {
41+
_success: false,
42+
logs: logger.getLogs(),
43+
debugUrl,
44+
sessionUrl,
45+
message: `Scroll position (${scrollInfo.scrollTop}px) is not halfway down the page (${halfwayScroll}px).`,
46+
};
47+
48+
return evaluationResult;
49+
} catch (error) {
50+
return {
51+
_success: false,
52+
error: error,
53+
logs: logger.getLogs(),
54+
debugUrl,
55+
sessionUrl,
56+
};
57+
} finally {
58+
await stagehand.close();
59+
}
60+
};

lib/a11y/utils.ts

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ const PUA_END = 0xf8ff;
3030

3131
const NBSP_CHARS = new Set<number>([0x00a0, 0x202f, 0x2007, 0xfeff]);
3232

33+
const WORLD_NAME = "stagehand-world";
34+
3335
/**
3436
* Clean a string by removing private-use unicode characters, normalizing whitespace,
3537
* and trimming the result.
@@ -1045,6 +1047,8 @@ export async function resolveObjectIdForXPath(
10451047
xpath: string,
10461048
targetFrame?: Frame,
10471049
): Promise<string | null> {
1050+
const contextId = await getFrameExecutionContextId(page, targetFrame);
1051+
10481052
const { result } = await page.sendCDP<{
10491053
result?: { objectId?: string };
10501054
}>(
@@ -1063,13 +1067,42 @@ export async function resolveObjectIdForXPath(
10631067
})();
10641068
`,
10651069
returnByValue: false,
1070+
...(contextId !== undefined ? { contextId } : {}),
10661071
},
10671072
targetFrame,
10681073
);
10691074
if (!result?.objectId) throw new StagehandElementNotFoundError([xpath]);
10701075
return result.objectId;
10711076
}
10721077

1078+
/**
1079+
* Returns a stable executionContextId for the given frame by creating (or reusing)
1080+
* an isolated world in that frame.
1081+
*/
1082+
async function getFrameExecutionContextId(
1083+
stagehandPage: StagehandPage,
1084+
frame: Frame,
1085+
): Promise<number | undefined> {
1086+
if (!frame || frame === stagehandPage.page.mainFrame()) {
1087+
// Main frame (or no frame): use the default world.
1088+
return undefined;
1089+
}
1090+
const frameId: string = await getCDPFrameId(stagehandPage, frame);
1091+
const { executionContextId } = await stagehandPage.sendCDP<{
1092+
executionContextId: number;
1093+
}>(
1094+
"Page.createIsolatedWorld",
1095+
{
1096+
frameId,
1097+
worldName: WORLD_NAME,
1098+
grantUniversalAccess: true,
1099+
},
1100+
frame,
1101+
);
1102+
1103+
return executionContextId;
1104+
}
1105+
10731106
/**
10741107
* Collapse consecutive whitespace characters (spaces, tabs, newlines, carriage returns)
10751108
* into single ASCII spaces.

lib/handlers/handlerUtils/actHandlerUtils.ts

Lines changed: 66 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import { Page, Locator, FrameLocator } from "playwright";
22
import { PlaywrightCommandException } from "../../../types/playwright";
33
import { StagehandPage } from "../../StagehandPage";
4-
import { getNodeFromXpath } from "@/lib/dom/utils";
54
import { Logger } from "../../../types/log";
65
import { MethodHandlerContext } from "@/types/act";
76
import { StagehandClickError } from "@/types/stagehandErrors";
@@ -59,7 +58,7 @@ export const methodHandlerMap: Record<
5958
};
6059

6160
export async function scrollToNextChunk(ctx: MethodHandlerContext) {
62-
const { stagehandPage, xpath, logger } = ctx;
61+
const { locator, logger, xpath } = ctx;
6362

6463
logger({
6564
category: "action",
@@ -71,40 +70,45 @@ export async function scrollToNextChunk(ctx: MethodHandlerContext) {
7170
});
7271

7372
try {
74-
await stagehandPage.page.evaluate(
75-
({ xpath }) => {
76-
const elementNode = getNodeFromXpath(xpath);
77-
if (!elementNode || elementNode.nodeType !== Node.ELEMENT_NODE) {
78-
throw Error(`Could not locate element to scroll on.`);
79-
}
73+
await locator.evaluate(
74+
(element) => {
75+
const waitForScrollEnd = (el: HTMLElement | Element) =>
76+
new Promise<void>((resolve) => {
77+
let last = el.scrollTop ?? 0;
78+
const check = () => {
79+
const cur = el.scrollTop ?? 0;
80+
if (cur === last) return resolve();
81+
last = cur;
82+
requestAnimationFrame(check);
83+
};
84+
requestAnimationFrame(check);
85+
});
8086

81-
const element = elementNode as HTMLElement;
8287
const tagName = element.tagName.toLowerCase();
83-
let height: number;
8488

8589
if (tagName === "html" || tagName === "body") {
86-
height = window.visualViewport.height;
87-
window.scrollBy({
88-
top: height,
89-
left: 0,
90-
behavior: "smooth",
91-
});
90+
const height = window.visualViewport?.height ?? window.innerHeight;
9291

93-
const scrollingEl =
94-
document.scrollingElement || document.documentElement;
95-
return window.waitForElementScrollEnd(scrollingEl as HTMLElement);
96-
} else {
97-
height = element.getBoundingClientRect().height;
98-
element.scrollBy({
99-
top: height,
100-
left: 0,
101-
behavior: "smooth",
102-
});
92+
window.scrollBy({ top: height, left: 0, behavior: "smooth" });
93+
94+
const scrollingRoot = (document.scrollingElement ??
95+
document.documentElement) as HTMLElement;
10396

104-
return window.waitForElementScrollEnd(element);
97+
return waitForScrollEnd(scrollingRoot);
10598
}
99+
100+
const height = (element as HTMLElement).getBoundingClientRect().height;
101+
102+
(element as HTMLElement).scrollBy({
103+
top: height,
104+
left: 0,
105+
behavior: "smooth",
106+
});
107+
108+
return waitForScrollEnd(element);
106109
},
107-
{ xpath },
110+
undefined,
111+
{ timeout: 10_000 },
108112
);
109113
} catch (e) {
110114
logger({
@@ -122,7 +126,7 @@ export async function scrollToNextChunk(ctx: MethodHandlerContext) {
122126
}
123127

124128
export async function scrollToPreviousChunk(ctx: MethodHandlerContext) {
125-
const { stagehandPage, xpath, logger } = ctx;
129+
const { locator, logger, xpath } = ctx;
126130

127131
logger({
128132
category: "action",
@@ -134,39 +138,41 @@ export async function scrollToPreviousChunk(ctx: MethodHandlerContext) {
134138
});
135139

136140
try {
137-
await stagehandPage.page.evaluate(
138-
({ xpath }) => {
139-
const elementNode = getNodeFromXpath(xpath);
140-
if (!elementNode || elementNode.nodeType !== Node.ELEMENT_NODE) {
141-
throw Error(`Could not locate element to scroll on.`);
142-
}
141+
await locator.evaluate(
142+
(element) => {
143+
const waitForScrollEnd = (el: HTMLElement | Element) =>
144+
new Promise<void>((resolve) => {
145+
let last = el.scrollTop ?? 0;
146+
const check = () => {
147+
const cur = el.scrollTop ?? 0;
148+
if (cur === last) return resolve();
149+
last = cur;
150+
requestAnimationFrame(check);
151+
};
152+
requestAnimationFrame(check);
153+
});
143154

144-
const element = elementNode as HTMLElement;
145155
const tagName = element.tagName.toLowerCase();
146-
let height: number;
147156

148157
if (tagName === "html" || tagName === "body") {
149-
height = window.visualViewport.height;
150-
window.scrollBy({
151-
top: -height,
152-
left: 0,
153-
behavior: "smooth",
154-
});
158+
const height = window.visualViewport?.height ?? window.innerHeight;
159+
window.scrollBy({ top: -height, left: 0, behavior: "smooth" });
155160

156-
const scrollingEl =
157-
document.scrollingElement || document.documentElement;
158-
return window.waitForElementScrollEnd(scrollingEl as HTMLElement);
159-
} else {
160-
height = element.getBoundingClientRect().height;
161-
element.scrollBy({
162-
top: -height,
163-
left: 0,
164-
behavior: "smooth",
165-
});
166-
return window.waitForElementScrollEnd(element);
161+
const rootScrollingEl = (document.scrollingElement ??
162+
document.documentElement) as HTMLElement;
163+
164+
return waitForScrollEnd(rootScrollingEl);
167165
}
166+
const height = (element as HTMLElement).getBoundingClientRect().height;
167+
(element as HTMLElement).scrollBy({
168+
top: -height,
169+
left: 0,
170+
behavior: "smooth",
171+
});
172+
return waitForScrollEnd(element);
168173
},
169-
{ xpath },
174+
undefined,
175+
{ timeout: 10_000 },
170176
);
171177
} catch (e) {
172178
logger({
@@ -215,7 +221,7 @@ export async function scrollElementIntoView(ctx: MethodHandlerContext) {
215221
}
216222

217223
export async function scrollElementToPercentage(ctx: MethodHandlerContext) {
218-
const { args, stagehandPage, xpath, logger } = ctx;
224+
const { args, xpath, logger, locator } = ctx;
219225

220226
logger({
221227
category: "action",
@@ -230,20 +236,14 @@ export async function scrollElementToPercentage(ctx: MethodHandlerContext) {
230236
try {
231237
const [yArg = "0%"] = args as string[];
232238

233-
await stagehandPage.page.evaluate(
234-
({ xpath, yArg }) => {
239+
await locator.evaluate<void, { yArg: string }>(
240+
(element, { yArg }) => {
235241
function parsePercent(val: string): number {
236242
const cleaned = val.trim().replace("%", "");
237243
const num = parseFloat(cleaned);
238244
return Number.isNaN(num) ? 0 : Math.max(0, Math.min(num, 100));
239245
}
240246

241-
const elementNode = getNodeFromXpath(xpath);
242-
if (!elementNode || elementNode.nodeType !== Node.ELEMENT_NODE) {
243-
throw Error(`Could not locate element to scroll on.`);
244-
}
245-
246-
const element = elementNode as HTMLElement;
247247
const yPct = parsePercent(yArg);
248248

249249
if (element.tagName.toLowerCase() === "html") {
@@ -266,7 +266,8 @@ export async function scrollElementToPercentage(ctx: MethodHandlerContext) {
266266
});
267267
}
268268
},
269-
{ xpath, yArg },
269+
{ yArg },
270+
{ timeout: 10_000 },
270271
);
271272
} catch (e) {
272273
logger({

0 commit comments

Comments
 (0)