Skip to content

Commit 81f5080

Browse files
committed
tests: adding evals for agent tool
1 parent b4d1d16 commit 81f5080

File tree

5 files changed

+1951
-4038
lines changed

5 files changed

+1951
-4038
lines changed

evals/mcp-eval-basic.config.json

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,21 @@
121121
"browserbase_stagehand_get_url",
122122
"browserbase_session_close"
123123
]
124+
},
125+
{
126+
"name": "agent-basic-test",
127+
"description": "Test autonomous agent functionality with simple task",
128+
"steps": [
129+
{
130+
"user": "Create a browser session and use the agent to go to https://example.com and extract the page title, then close the session",
131+
"expectedState": "Example Domain"
132+
}
133+
],
134+
"expectTools": [
135+
"browserbase_session_create",
136+
"browserbase_stagehand_agent",
137+
"browserbase_session_close"
138+
]
124139
}
125140
]
126141
}

evals/mcp-eval-minimal.config.json

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,21 @@
6767
"browserbase_stagehand_get_url",
6868
"browserbase_session_close"
6969
]
70+
},
71+
{
72+
"name": "smoke-test-agent",
73+
"description": "Quick test to verify autonomous agent works",
74+
"steps": [
75+
{
76+
"user": "Create a browser session and use the agent to visit example.org and tell me the page title, then close the session",
77+
"expectedState": "Example Domain"
78+
}
79+
],
80+
"expectTools": [
81+
"browserbase_session_create",
82+
"browserbase_stagehand_agent",
83+
"browserbase_session_close"
84+
]
7085
}
7186
]
7287
}

evals/mcp-eval.config.json

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,21 @@
105105
"browserbase_session_create",
106106
"browserbase_stagehand_navigate"
107107
]
108+
},
109+
{
110+
"name": "agent-complex-task-test",
111+
"description": "Test agent with multi-step task",
112+
"steps": [
113+
{
114+
"user": "Create a browser session and use the agent to go to https://news.ycombinator.com, find the top post, and extract its title and score, then close the session",
115+
"expectedState": "points"
116+
}
117+
],
118+
"expectTools": [
119+
"browserbase_session_create",
120+
"browserbase_stagehand_agent",
121+
"browserbase_session_close"
122+
]
108123
}
109124
]
110125
}

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
"@changesets/cli": "^2.29.6",
5858
"@eslint/js": "^9.29.0",
5959
"@smithery/cli": "^1.2.15",
60+
"@types/node": "^24.10.1",
6061
"chalk": "^5.3.0",
6162
"eslint": "^9.29.0",
6263
"eslint-plugin-react": "^7.37.5",

0 commit comments

Comments
 (0)