Skip to content

Commit e77d018

Browse files
[fix] ensure selfHeal respects arguments (#897)
# why On self healing we were finding the selector, but nor remembering the arguments previously passed; so either they were empty or the LLM was hallucinating them. This PR fixes this # what changed Updated the logic from selfHealing to reuse the previously passed arguments and method # test plan - [x] Added evals to act and regression (`evals/tasks/heal_*.ts`) --------- Co-authored-by: Sean McGuire <[email protected]>
1 parent b29ac55 commit e77d018

File tree

8 files changed

+223
-16
lines changed

8 files changed

+223
-16
lines changed

.changeset/chilly-geckos-burn.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@browserbasehq/stagehand": patch
3+
---
4+
5+
Fix selfHeal to remember intially received arguments

evals/evals.config.json

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,18 @@
386386
{
387387
"name": "nested_iframes_2",
388388
"categories": ["act"]
389+
},
390+
{
391+
"name": "heal_scroll_50",
392+
"categories": ["act"]
393+
},
394+
{
395+
"name": "heal_simple_google_search",
396+
"categories": ["regression", "act"]
397+
},
398+
{
399+
"name": "heal_custom_dropdown",
400+
"categories": ["act"]
389401
}
390402
]
391403
}

evals/initStagehand.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ const StagehandConfig = {
4848
},
4949
},
5050
},
51+
selfHeal: true,
5152
};
5253

5354
/**

evals/tasks/heal_custom_dropdown.ts

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
import { EvalFunction } from "@/types/evals";
2+
3+
export const heal_custom_dropdown: EvalFunction = async ({
4+
debugUrl,
5+
sessionUrl,
6+
stagehand,
7+
logger,
8+
}) => {
9+
/**
10+
* This eval is meant to test whether we do not incorrectly attempt
11+
* the selectOptionFromDropdown method (defined in actHandlerUtils.ts) on a
12+
* 'dropdown' that is not a <select> element.
13+
*
14+
* This kind of dropdown must be clicked to be expanded before being interacted
15+
* with.
16+
*/
17+
18+
try {
19+
const page = stagehand.page;
20+
await page.goto(
21+
"https://browserbase.github.io/stagehand-eval-sites/sites/expand-dropdown/",
22+
);
23+
24+
await page.act({
25+
description: "The dropdown",
26+
selector: "/html/not-a-dropdown",
27+
arguments: [],
28+
method: "click",
29+
});
30+
31+
// we are expecting stagehand to click the dropdown to expand it,
32+
// and therefore the available options should now be contained in the full
33+
// a11y tree.
34+
35+
// to test, we'll grab the full a11y tree, and make sure it contains 'Canada'
36+
const extraction = await page.extract();
37+
const fullTree = extraction.page_text;
38+
39+
if (fullTree.includes("Canada")) {
40+
return {
41+
_success: true,
42+
debugUrl,
43+
sessionUrl,
44+
logs: logger.getLogs(),
45+
};
46+
}
47+
return {
48+
_success: false,
49+
message: "unable to expand the dropdown",
50+
debugUrl,
51+
sessionUrl,
52+
logs: logger.getLogs(),
53+
};
54+
} catch (error) {
55+
return {
56+
_success: false,
57+
message: `error attempting to select an option from the dropdown: ${error.message}`,
58+
debugUrl,
59+
sessionUrl,
60+
logs: logger.getLogs(),
61+
};
62+
} finally {
63+
await stagehand.close();
64+
}
65+
};

evals/tasks/heal_scroll_50.ts

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
import { EvalFunction } from "@/types/evals";
2+
3+
export const heal_scroll_50: EvalFunction = async ({
4+
debugUrl,
5+
sessionUrl,
6+
stagehand,
7+
logger,
8+
}) => {
9+
try {
10+
await stagehand.page.goto(
11+
"https://browserbase.github.io/stagehand-eval-sites/sites/aigrant/",
12+
);
13+
await stagehand.page.act({
14+
description: "the element to scroll on",
15+
selector: "/html/body/div/div/button",
16+
arguments: ["50%"],
17+
method: "scrollTo",
18+
});
19+
20+
await new Promise((resolve) => setTimeout(resolve, 5000));
21+
22+
// Get the current scroll position and total scroll height
23+
const scrollInfo = await stagehand.page.evaluate(() => {
24+
return {
25+
scrollTop: window.scrollY + window.innerHeight / 2,
26+
scrollHeight: document.documentElement.scrollHeight,
27+
};
28+
});
29+
30+
const halfwayScroll = scrollInfo.scrollHeight / 2;
31+
const halfwayReached =
32+
Math.abs(scrollInfo.scrollTop - halfwayScroll) <= 200;
33+
const evaluationResult = halfwayReached
34+
? {
35+
_success: true,
36+
logs: logger.getLogs(),
37+
debugUrl,
38+
sessionUrl,
39+
}
40+
: {
41+
_success: false,
42+
logs: logger.getLogs(),
43+
debugUrl,
44+
sessionUrl,
45+
message: `Scroll position (${scrollInfo.scrollTop}px) is not halfway down the page (${halfwayScroll}px).`,
46+
};
47+
48+
return evaluationResult;
49+
} catch (error) {
50+
return {
51+
_success: false,
52+
error: error,
53+
logs: logger.getLogs(),
54+
debugUrl,
55+
sessionUrl,
56+
};
57+
} finally {
58+
await stagehand.close();
59+
}
60+
};
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
import { EvalFunction } from "@/types/evals";
2+
3+
export const heal_simple_google_search: EvalFunction = async ({
4+
debugUrl,
5+
sessionUrl,
6+
stagehand,
7+
logger,
8+
}) => {
9+
try {
10+
await stagehand.page.goto(
11+
"https://browserbase.github.io/stagehand-eval-sites/sites/google/",
12+
);
13+
14+
await stagehand.page.act({
15+
description: "The search bar",
16+
selector: "/html/not-the-search-bar",
17+
arguments: ["OpenAI"],
18+
method: "fill",
19+
});
20+
21+
await stagehand.page.act("click the google search button");
22+
23+
const expectedUrl =
24+
"https://browserbase.github.io/stagehand-eval-sites/sites/google/openai.html";
25+
const currentUrl = stagehand.page.url();
26+
27+
return {
28+
_success: currentUrl.startsWith(expectedUrl),
29+
currentUrl,
30+
debugUrl,
31+
sessionUrl,
32+
logs: logger.getLogs(),
33+
};
34+
} catch (error) {
35+
return {
36+
_success: false,
37+
error: error,
38+
debugUrl,
39+
sessionUrl,
40+
logs: logger.getLogs(),
41+
};
42+
} finally {
43+
await stagehand.close();
44+
}
45+
};

lib/handlers/actHandler.ts

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -147,10 +147,35 @@ export class StagehandActHandler {
147147
: method
148148
? `${method} ${observe.description}`
149149
: observe.description;
150-
// Call act with the ObserveResult description
151-
return await this.stagehandPage.act({
152-
action: actCommand,
150+
const instruction = buildActObservePrompt(
151+
actCommand,
152+
Object.values(SupportedPlaywrightAction),
153+
{},
154+
);
155+
const observeResults = await this.stagehandPage.observe({
156+
instruction,
153157
});
158+
if (observeResults.length === 0) {
159+
return {
160+
success: false,
161+
message: `Failed to self heal act: No observe results found for action`,
162+
action: actCommand,
163+
};
164+
}
165+
const element: ObserveResult = observeResults[0];
166+
await this._performPlaywrightMethod(
167+
// override previously provided method and arguments
168+
observe.method,
169+
observe.arguments,
170+
// only update selector
171+
element.selector,
172+
domSettleTimeoutMs,
173+
);
174+
return {
175+
success: true,
176+
message: `Action [${element.method}] performed successfully on selector: ${element.selector}`,
177+
action: observe.description || `ObserveResult action (${method})`,
178+
};
154179
} catch (err) {
155180
this.logger({
156181
category: "action",
@@ -282,9 +307,10 @@ export class StagehandActHandler {
282307
private async _performPlaywrightMethod(
283308
method: string,
284309
args: unknown[],
285-
xpath: string,
310+
rawXPath: string,
286311
domSettleTimeoutMs?: number,
287312
) {
313+
const xpath = rawXPath.replace(/^xpath=/i, "").trim();
288314
const locator = deepLocator(this.stagehandPage.page, xpath).first();
289315
const initialUrl = this.stagehandPage.page.url();
290316

lib/handlers/handlerUtils/actHandlerUtils.ts

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,8 @@ import { StagehandClickError } from "@/types/stagehandErrors";
88

99
const IFRAME_STEP_RE = /^iframe(\[[^\]]+])?$/i;
1010

11-
export function deepLocator(
12-
root: Page | FrameLocator,
13-
rawXPath: string,
14-
): Locator {
15-
// 1 ─ strip optional 'xpath=' prefix and whitespace
16-
let xpath = rawXPath.replace(/^xpath=/i, "").trim();
11+
export function deepLocator(root: Page | FrameLocator, xpath: string): Locator {
12+
// 1 ─ prepend with slash if not already included
1713
if (!xpath.startsWith("/")) xpath = "/" + xpath;
1814

1915
// 2 ─ split into steps, accumulate until we hit an iframe step
@@ -79,8 +75,7 @@ export async function scrollToNextChunk(ctx: MethodHandlerContext) {
7975
({ xpath }) => {
8076
const elementNode = getNodeFromXpath(xpath);
8177
if (!elementNode || elementNode.nodeType !== Node.ELEMENT_NODE) {
82-
console.warn(`Could not locate element to scroll by its height.`);
83-
return Promise.resolve();
78+
throw Error(`Could not locate element to scroll on.`);
8479
}
8580

8681
const element = elementNode as HTMLElement;
@@ -143,8 +138,7 @@ export async function scrollToPreviousChunk(ctx: MethodHandlerContext) {
143138
({ xpath }) => {
144139
const elementNode = getNodeFromXpath(xpath);
145140
if (!elementNode || elementNode.nodeType !== Node.ELEMENT_NODE) {
146-
console.warn(`Could not locate element to scroll by its height.`);
147-
return Promise.resolve();
141+
throw Error(`Could not locate element to scroll on.`);
148142
}
149143

150144
const element = elementNode as HTMLElement;
@@ -246,8 +240,7 @@ export async function scrollElementToPercentage(ctx: MethodHandlerContext) {
246240

247241
const elementNode = getNodeFromXpath(xpath);
248242
if (!elementNode || elementNode.nodeType !== Node.ELEMENT_NODE) {
249-
console.warn(`Could not locate element to scroll on.`);
250-
return;
243+
throw Error(`Could not locate element to scroll on.`);
251244
}
252245

253246
const element = elementNode as HTMLElement;

0 commit comments

Comments
 (0)