Skip to content

Commit 81f0e1a

Browse files
committed
offload initialization of agent to the runner
1 parent fffe986 commit 81f0e1a

28 files changed

+44
-177
lines changed

evals/initStagehand.ts

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import {
1818
} from "@browserbasehq/stagehand";
1919
import { EvalLogger } from "./logger";
2020
import type { StagehandInitResult } from "@/types/evals";
21+
import type { AgentConfig } from "@/dist";
2122
import { AvailableModel } from "@browserbasehq/stagehand";
2223

2324
/**
@@ -104,12 +105,26 @@ export const initStagehand = async ({
104105
// Set navigation timeout to 60 seconds for evaluations
105106
stagehand.context.setDefaultNavigationTimeout(60_000);
106107

108+
const isCUAModel = (model: string): boolean =>
109+
model.includes("computer-use-preview") || model.startsWith("claude");
110+
111+
let agentConfig: AgentConfig | undefined;
112+
if (isCUAModel(modelName)) {
113+
agentConfig = {
114+
model: modelName,
115+
provider: modelName.startsWith("claude") ? "anthropic" : "openai",
116+
} as AgentConfig;
117+
}
118+
119+
const agent = stagehand.agent(agentConfig);
120+
107121
return {
108122
stagehand,
109123
stagehandConfig: config,
110124
logger,
111125
debugUrl,
112126
sessionUrl,
113127
modelName,
128+
agent,
114129
};
115130
};

evals/tasks/agent/all_recipes.ts

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,11 @@ export const all_recipes: EvalFunction = async ({
55
sessionUrl,
66
stagehand,
77
logger,
8-
modelName,
8+
agent,
99
}) => {
1010
try {
1111
await stagehand.page.goto("https://www.allrecipes.com/");
1212

13-
const agent = stagehand.agent({
14-
model: modelName,
15-
provider: modelName.startsWith("claude") ? "anthropic" : "openai",
16-
instructions: `You are a helpful assistant that can help me with my tasks. You are given a task and you need to complete it without asking follow up questions. The current page is ${await stagehand.page.title()}`,
17-
});
18-
1913
const agentResult = await agent.execute({
2014
instruction:
2115
"Search for a recipe for Beef Wellington on Allrecipes that has at least 200 reviews and an average rating of 4.5 stars or higher. List the main ingredients required for the dish.",

evals/tasks/agent/amazon_shoes.ts

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,10 @@ export const amazon_shoes: EvalFunction = async ({
55
sessionUrl,
66
stagehand,
77
logger,
8-
modelName,
8+
agent,
99
}) => {
1010
try {
1111
await stagehand.page.goto("https://www.amazon.com/");
12-
const agent = stagehand.agent({
13-
model: modelName,
14-
provider: modelName.startsWith("claude") ? "anthropic" : "openai",
15-
instructions: `You are a helpful web automation assistant. DON'T ASK FOLLOW UP QUESTIONS UNTIL YOU HAVE FULFILLED THE USER'S REQUEST. Today is ${new Date().toLocaleDateString()}.`,
16-
});
1712

1813
const agentResult = await agent.execute({
1914
instruction:

evals/tasks/agent/apple_trade_in.ts

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,10 @@ export const apple_trade_in: EvalFunction = async ({
55
sessionUrl,
66
stagehand,
77
logger,
8-
modelName,
8+
agent,
99
}) => {
1010
try {
1111
await stagehand.page.goto("https://www.apple.com/");
12-
const agent = stagehand.agent({
13-
model: modelName,
14-
provider: modelName.startsWith("claude") ? "anthropic" : "openai",
15-
instructions: `You are a helpful web automation assistant. DON'T ASK FOLLOW UP QUESTIONS UNTIL YOU HAVE FULFILLED THE USER'S REQUEST. Today is ${new Date().toLocaleDateString()}.`,
16-
});
1712

1813
const agentResult = await agent.execute({
1914
instruction:

evals/tasks/agent/apple_tv.ts

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,10 @@ export const apple_tv: EvalFunction = async ({
55
sessionUrl,
66
stagehand,
77
logger,
8-
modelName,
8+
agent,
99
}) => {
1010
try {
1111
await stagehand.page.goto("https://www.apple.com/");
12-
const agent = stagehand.agent({
13-
model: modelName,
14-
provider: modelName.startsWith("claude") ? "anthropic" : "openai",
15-
instructions: `You are a helpful web automation assistant. DON'T ASK FOLLOW UP QUESTIONS UNTIL YOU HAVE FULFILLED THE USER'S REQUEST. Today is ${new Date().toLocaleDateString()}.`,
16-
});
1712

1813
const agentResult = await agent.execute({
1914
instruction:

evals/tasks/agent/arxiv_gpt_report.ts

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,10 @@ export const arxiv_gpt_report: EvalFunction = async ({
55
sessionUrl,
66
stagehand,
77
logger,
8-
modelName,
8+
agent,
99
}) => {
1010
try {
1111
await stagehand.page.goto("https://arxiv.org/");
12-
const agent = stagehand.agent({
13-
model: modelName,
14-
provider: modelName.startsWith("claude") ? "anthropic" : "openai",
15-
instructions: `You are a helpful web automation assistant. DON'T ASK FOLLOW UP QUESTIONS UNTIL YOU HAVE FULFILLED THE USER'S REQUEST. Today is ${new Date().toLocaleDateString()}.`,
16-
});
1712

1813
const agentResult = await agent.execute({
1914
instruction:

evals/tasks/agent/github.ts

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,11 @@ export const github: EvalFunction = async ({
55
sessionUrl,
66
stagehand,
77
logger,
8-
modelName,
8+
agent,
99
}) => {
1010
try {
1111
await stagehand.page.goto("https://github.com/");
1212

13-
const agent = stagehand.agent({
14-
model: modelName,
15-
provider: modelName.startsWith("claude") ? "anthropic" : "openai",
16-
instructions: `You are a helpful assistant that can help me with my tasks. You are given a task and you need to complete it without asking follow up questions. The current page is ${await stagehand.page.title()}`,
17-
});
18-
1913
const agentResult = await agent.execute({
2014
instruction:
2115
"Find a Ruby repository on GitHub that has been updated in the past 3 days and has at least 1000 stars.",

evals/tasks/agent/github_react_version.ts

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,10 @@ export const github_react_version: EvalFunction = async ({
55
sessionUrl,
66
stagehand,
77
logger,
8-
modelName,
8+
agent,
99
}) => {
1010
try {
1111
await stagehand.page.goto("https://github.com/");
12-
const agent = stagehand.agent({
13-
model: modelName,
14-
provider: modelName.startsWith("claude") ? "anthropic" : "openai",
15-
instructions: `You are a helpful web automation assistant. DON'T ASK FOLLOW UP QUESTIONS UNTIL YOU HAVE FULFILLED THE USER'S REQUEST. Today is ${new Date().toLocaleDateString()}.`,
16-
});
1712

1813
const agentResult = await agent.execute({
1914
instruction:

evals/tasks/agent/google_flights.ts

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,11 @@ export const google_flights: EvalFunction = async ({
66
sessionUrl,
77
stagehand,
88
logger,
9-
modelName,
9+
agent,
1010
}) => {
1111
try {
1212
await stagehand.page.goto("https://google.com/travel/flights");
1313

14-
const agent = stagehand.agent({
15-
model: modelName,
16-
provider: modelName.startsWith("claude") ? "anthropic" : "openai",
17-
instructions: `You are a helpful assistant that can help me with my tasks. You are given a task and you need to complete it without asking follow up questions. Today is ${new Date().toISOString().slice(0, 10)}. The current page is ${await stagehand.page.title()}`,
18-
});
19-
2014
const agentResult = await agent.execute({
2115
instruction:
2216
"Search for flights from San Francisco to New York for next weekend",

evals/tasks/agent/google_maps.ts

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,11 @@ export const google_maps: EvalFunction = async ({
66
sessionUrl,
77
stagehand,
88
logger,
9-
modelName,
9+
agent,
1010
}) => {
1111
try {
1212
await stagehand.page.goto("https://maps.google.com");
1313

14-
const agent = stagehand.agent({
15-
model: modelName,
16-
provider: modelName.startsWith("claude") ? "anthropic" : "openai",
17-
instructions: `You are a helpful assistant that can help me with my tasks. You are given a task and you need to complete it without asking follow up questions. The current page is ${await stagehand.page.title()}`,
18-
});
19-
2014
const agentResult = await agent.execute({
2115
instruction:
2216
"How long does it take to get from San Francisco to New York driving?",

0 commit comments

Comments
 (0)