Skip to content

Commit 42f3b20

Browse files
committed
feat: adding stagehand agent tool
1 parent f841909 commit 42f3b20

File tree

2 files changed

+94
-0
lines changed

2 files changed

+94
-0
lines changed

src/tools/agent.ts

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
import { z } from "zod";
2+
import type { Tool, ToolSchema, ToolResult } from "./tool.js";
3+
import type { Context } from "../context.js";
4+
import type { ToolActionResult } from "../types/types.js";
5+
6+
/**
7+
* Stagehand Agent
8+
* Docs: https://docs.stagehand.dev/basics/agent
9+
*
10+
* This tool uses Gemini Computer Use to autonomously complete web-based tasks.
11+
* The agent will navigate, interact, and complete the task described in the prompt.
12+
*/
13+
14+
const AgentInputSchema = z.object({
15+
prompt: z.string().describe(
16+
`The task prompt describing what you want the agent to accomplish.
17+
Be clear and specific about the goal. For example:
18+
'Go to Hacker News and find the most controversial post from today, then summarize the top 3 comments'.
19+
The agent will autonomously navigate and interact with web pages to complete this task.`,
20+
),
21+
});
22+
23+
type AgentInput = z.infer<typeof AgentInputSchema>;
24+
25+
const agentSchema: ToolSchema<typeof AgentInputSchema> = {
26+
name: "browserbase_stagehand_agent",
27+
description: `Execute a task autonomously using Gemini Computer Use agent. The agent will navigate and interact with web pages to complete the given task.`,
28+
inputSchema: AgentInputSchema,
29+
};
30+
31+
async function handleAgent(
32+
context: Context,
33+
params: AgentInput,
34+
): Promise<ToolResult> {
35+
const action = async (): Promise<ToolActionResult> => {
36+
try {
37+
const stagehand = await context.getStagehand();
38+
39+
// You need to provide GOOGLE_GENERATIVE_AI_API_KEY
40+
const agent = stagehand.agent({
41+
cua: true,
42+
model: {
43+
modelName: "google/gemini-2.5-computer-use-preview-10-2025",
44+
apiKey:
45+
process.env.GOOGLE_GENERATIVE_AI_API_KEY ||
46+
process.env.GOOGLE_API_KEY ||
47+
process.env.GEMINI_API_KEY,
48+
},
49+
});
50+
51+
// Execute the task
52+
const result = await agent.execute({
53+
instruction: params.prompt,
54+
maxSteps: 20,
55+
});
56+
57+
// Format response with both steps and result
58+
// The result structure may vary, so we handle it flexibly
59+
const resultData = result as unknown as Record<string, unknown>;
60+
const response = {
61+
result: resultData.result || result,
62+
steps: resultData.steps || resultData.trace || [],
63+
};
64+
65+
return {
66+
content: [
67+
{
68+
type: "text",
69+
text: `Agent execution completed:\n${JSON.stringify(response, null, 2)}`,
70+
},
71+
],
72+
};
73+
} catch (error) {
74+
const errorMsg = error instanceof Error ? error.message : String(error);
75+
throw new Error(`Failed to execute agent task: ${errorMsg}`);
76+
}
77+
};
78+
79+
return {
80+
action,
81+
waitForNetwork: false,
82+
};
83+
}
84+
85+
const agentTool: Tool<typeof AgentInputSchema> = {
86+
capability: "core",
87+
schema: agentSchema,
88+
handle: handleAgent,
89+
};
90+
91+
export default agentTool;

src/tools/index.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import observeTool from "./observe.js";
55
import screenshotTool from "./screenshot.js";
66
import sessionTools from "./session.js";
77
import getUrlTool from "./url.js";
8+
import agentTool from "./agent.js";
89

910
// Export individual tools
1011
export { default as navigateTool } from "./navigate.js";
@@ -14,6 +15,7 @@ export { default as observeTool } from "./observe.js";
1415
export { default as screenshotTool } from "./screenshot.js";
1516
export { default as sessionTools } from "./session.js";
1617
export { default as getUrlTool } from "./url.js";
18+
export { default as agentTool } from "./agent.js";
1719

1820
// Export all tools as array
1921
export const TOOLS = [
@@ -24,6 +26,7 @@ export const TOOLS = [
2426
observeTool,
2527
screenshotTool,
2628
getUrlTool,
29+
agentTool,
2730
];
2831

2932
export const sessionManagementTools = sessionTools;

0 commit comments

Comments
 (0)