Skip to content

Commit f1d7dec

Browse files
committed
change to structured extract, better prompts for tools
1 parent 352b9c2 commit f1d7dec

File tree

4 files changed

+65
-36
lines changed

4 files changed

+65
-36
lines changed

src/sessionManager.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ import { AvailableModel, Stagehand } from "@browserbasehq/stagehand";
66
import type { Config } from "../config.js";
77
import type { Cookie } from "playwright-core";
88

9-
// Define the type for a session object
109
export type BrowserSession = {
1110
browser: Browser;
1211
page: Page;

src/tools/extract.ts

Lines changed: 38 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,37 @@ import type { Context } from "../context.js";
44
import type { ToolActionResult } from "../context.js";
55

66
const ExtractInputSchema = z.object({
7-
random_string: z.string().optional().describe("Dummy parameter for no-parameter tools"),
7+
instruction: z.string().describe(
8+
"The specific instruction for what information to extract from the current page. " +
9+
"Be as detailed and specific as possible about what you want to extract. For example: " +
10+
"'Extract all product names and prices from the listing page' or 'Get the article title, " +
11+
"author, and publication date from this blog post'. The more specific your instruction, " +
12+
"the better the extraction results will be. Avoid vague instructions like 'get everything' " +
13+
"or 'extract the data'. Instead, be explicit about the exact elements, text, or information you need."
14+
),
15+
schema: z.string().describe(
16+
"A JSON schema string that defines the exact structure and format of the data you want to extract. " +
17+
"This schema should be a valid JSON string that describes the expected output format. For example: " +
18+
"'{\"type\": \"object\", \"properties\": {\"title\": {\"type\": \"string\"}, \"price\": {\"type\": \"number\"}}}' " +
19+
"or '{\"type\": \"array\", \"items\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, " +
20+
"\"description\": {\"type\": \"string\"}}}}'. The schema helps ensure the extracted data is properly " +
21+
"formatted and structured. If the schema is invalid JSON, extraction will proceed without schema validation. " +
22+
"Use this to specify exactly how you want the extracted information organized and typed."
23+
),
824
});
925

1026
type ExtractInput = z.infer<typeof ExtractInputSchema>;
1127

1228
const extractSchema: ToolSchema<typeof ExtractInputSchema> = {
1329
name: "stagehand_extract",
14-
description: "Extracts all of the text from the current page.",
30+
description:
31+
"Extracts structured information and text content from the current web page based on specific instructions " +
32+
"and a defined schema. This tool is ideal for scraping data, gathering information, or pulling specific " +
33+
"content from web pages. Use this tool when you need to get text content, data, or information from a page " +
34+
"rather than interacting with elements. For interactive elements like buttons, forms, or clickable items, " +
35+
"use the observe tool instead. The extraction works best when you provide clear, specific instructions " +
36+
"about what to extract and a well-defined JSON schema for the expected output format. This ensures " +
37+
"the extracted data is properly structured and usable.",
1538
inputSchema: ExtractInputSchema,
1639
};
1740

@@ -21,40 +44,25 @@ async function handleExtract(
2144
): Promise<ToolResult> {
2245
const action = async (): Promise<ToolActionResult> => {
2346
try {
24-
const page = await context.getActivePage();
25-
if (!page) {
26-
throw new Error("No active page available");
27-
}
47+
const stagehand = await context.getStagehand();
48+
49+
let parsedSchema = null;
50+
try {
51+
parsedSchema = JSON.parse(params.schema);
52+
} catch (error) {
53+
throw new Error(`Invalid schema format: ${error instanceof Error ? error.message : 'Unknown error'}`);
54+
}
2855

29-
const bodyText = await page.evaluate(() => document.body.innerText);
30-
31-
const content = bodyText
32-
.split("\n")
33-
.map((line) => line.trim())
34-
.filter((line) => {
35-
if (!line) return false;
36-
37-
if (
38-
(line.includes("{") && line.includes("}")) ||
39-
line.includes("@keyframes") || // Remove CSS animations
40-
line.match(/^\.[a-zA-Z0-9_-]+\s*{/) || // Remove CSS lines starting with .className {
41-
line.match(/^[a-zA-Z-]+:[a-zA-Z0-9%\s\(\)\.,-]+;$/) // Remove lines like "color: blue;" or "margin: 10px;"
42-
) {
43-
return false;
44-
}
45-
return true;
46-
})
47-
.map((line) => {
48-
return line.replace(/\\u([0-9a-fA-F]{4})/g, (_, hex) =>
49-
String.fromCharCode(parseInt(hex, 16))
50-
);
51-
});
56+
const extraction = await stagehand.page.extract({
57+
instruction: params.instruction,
58+
schema: parsedSchema // If schema not properly formatted, will just extract without given schema
59+
});
5260

5361
return {
5462
content: [
5563
{
5664
type: "text",
57-
text: `Extracted content:\n${content.join("\n")}`,
65+
text: `Extracted content:\n${extraction.join("\n")}`,
5866
},
5967
],
6068
};

src/tools/observe.ts

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,18 @@ import type { ToolActionResult } from "../context.js";
55

66
const ObserveInputSchema = z.object({
77
instruction: z.string().describe(
8-
"Instruction for observation (e.g., 'find the login button'). This instruction must be extremely specific."
8+
"Detailed instruction for what specific elements or components to observe on the web page. " +
9+
"This instruction must be extremely specific and descriptive. For example: 'Find the red login button " +
10+
"in the top right corner', 'Locate the search input field with placeholder text', or 'Identify all " +
11+
"clickable product cards on the page'. The more specific and detailed your instruction, the better " +
12+
"the observation results will be. Avoid generic instructions like 'find buttons' or 'see elements'. " +
13+
"Instead, describe the visual characteristics, location, text content, or functionality of the elements " +
14+
"you want to observe. This tool is designed to help you identify interactive elements that you can " +
15+
"later use with the act tool for performing actions like clicking, typing, or form submission."
16+
),
17+
returnAction: z.boolean().optional().describe(
18+
"Whether to return the action to perform on the element. If true, the action will be returned as a string. " +
19+
"If false, the action will not be returned."
920
),
1021
});
1122

@@ -14,7 +25,14 @@ type ObserveInput = z.infer<typeof ObserveInputSchema>;
1425
const observeSchema: ToolSchema<typeof ObserveInputSchema> = {
1526
name: "stagehand_observe",
1627
description:
17-
"Observes elements on the web page. Use this tool to observe elements that you can later use in an action. Use observe instead of extract when dealing with actionable (interactable) elements rather than text. More often than not, you'll want to use extract instead of observe when dealing with scraping or extracting structured text.",
28+
"Observes and identifies specific interactive elements on the current web page that can be used for subsequent actions. " +
29+
"This tool is specifically designed for finding actionable (interactable) elements such as buttons, links, form fields, " +
30+
"dropdowns, checkboxes, and other UI components that you can interact with. Use this tool when you need to locate " +
31+
"elements before performing actions with the act tool. DO NOT use this tool for extracting text content or data - " +
32+
"use the extract tool instead for that purpose. The observe tool returns detailed information about the identified " +
33+
"elements including their properties, location, and interaction capabilities. This information can then be used " +
34+
"to craft precise actions. The more specific your observation instruction, the more accurate the element identification " +
35+
"will be. Think of this as your 'eyes' on the page to find exactly what you need to interact with.",
1836
inputSchema: ObserveInputSchema,
1937
};
2038

@@ -28,7 +46,7 @@ async function handleObserve(
2846

2947
const observations = await stagehand.page.observe({
3048
instruction: params.instruction,
31-
returnAction: false,
49+
returnAction: params.returnAction,
3250
});
3351

3452
return {

src/tools/screenshot.ts

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import type { ToolActionResult } from "../context.js";
55
import { screenshots } from "../resources.js";
66

77
const ScreenshotInputSchema = z.object({
8-
random_string: z.string().optional().describe("Dummy parameter for no-parameter tools"),
8+
name: z.string().optional().describe("The name of the screenshot"),
99
});
1010

1111
type ScreenshotInput = z.infer<typeof ScreenshotInputSchema>;
@@ -34,7 +34,11 @@ async function handleScreenshot(
3434

3535
// Convert buffer to base64 string and store in memory
3636
const screenshotBase64 = screenshotBuffer.toString("base64");
37-
const name = `screenshot-${new Date()
37+
const name =
38+
`screenshot-${params.name}-${new Date()
39+
.toISOString()
40+
.replace(/:/g, "-")}` ||
41+
`screenshot-${new Date()
3842
.toISOString()
3943
.replace(/:/g, "-")}`;
4044
screenshots.set(name, screenshotBase64);

0 commit comments

Comments
 (0)