Skip to content

Commit 49cad24

Browse files
committed
adding evals to mcp server, auto publish to npm, ci evals
1 parent 769b881 commit 49cad24

File tree

10 files changed

+2314
-3040
lines changed

10 files changed

+2314
-3040
lines changed

.github/workflows/ci.yml

Lines changed: 101 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,115 @@
1-
name: CI
1+
name: CI/CD Pipeline
22

33
on:
4-
push:
5-
branches: [main]
64
pull_request:
75
branches: [main]
6+
push:
7+
branches: [main]
88

99
jobs:
10-
lint:
10+
test:
11+
name: Test and Lint
1112
runs-on: ubuntu-latest
13+
1214
steps:
13-
- uses: actions/checkout@v4
15+
- name: Checkout code
16+
uses: actions/checkout@v4
17+
18+
- name: Setup Node.js
19+
uses: actions/setup-node@v4
20+
with:
21+
node-version: '18'
22+
cache: 'pnpm'
23+
1424
- name: Install pnpm
1525
uses: pnpm/action-setup@v4
16-
- name: Use Node.js 22
26+
with:
27+
version: 8
28+
29+
- name: Install dependencies
30+
run: pnpm install --frozen-lockfile
31+
32+
- name: Run linting
33+
run: pnpm lint
34+
35+
- name: Check formatting
36+
run: pnpm prettier:check
37+
38+
- name: Build project
39+
run: pnpm build
40+
41+
- name: Run evaluation tests
42+
env:
43+
BROWSERBASE_API_KEY: ${{ secrets.BROWSERBASE_API_KEY }}
44+
BROWSERBASE_PROJECT_ID: ${{ secrets.BROWSERBASE_PROJECT_ID }}
45+
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
46+
run: |
47+
pnpm test
48+
49+
publish:
50+
name: Publish to NPM
51+
runs-on: ubuntu-latest
52+
needs: test
53+
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
54+
55+
steps:
56+
- name: Checkout code
57+
uses: actions/checkout@v4
58+
with:
59+
fetch-depth: 0
60+
token: ${{ secrets.GITHUB_TOKEN }}
61+
62+
- name: Setup Node.js
1763
uses: actions/setup-node@v4
1864
with:
19-
node-version: "22"
20-
cache: "pnpm"
65+
node-version: '18'
66+
cache: 'pnpm'
67+
registry-url: 'https://registry.npmjs.org'
68+
69+
- name: Install pnpm
70+
uses: pnpm/action-setup@v4
71+
with:
72+
version: 8
73+
2174
- name: Install dependencies
2275
run: pnpm install --frozen-lockfile
23-
- run: pnpm run build
24-
- name: Run ESLint
25-
run: pnpm run lint
26-
- name: Ensure no changes
27-
run: git diff --exit-code
76+
77+
- name: Configure git
78+
run: |
79+
git config --global user.name 'github-actions[bot]'
80+
git config --global user.email 'github-actions[bot]@users.noreply.github.com'
81+
82+
- name: Bump version
83+
id: version
84+
run: |
85+
# Get current version from package.json
86+
CURRENT_VERSION=$(node -p "require('./package.json').version")
87+
echo "current_version=$CURRENT_VERSION" >> $GITHUB_OUTPUT
88+
89+
# Bump minor version
90+
npm version minor --no-git-tag-version
91+
92+
# Get new version
93+
NEW_VERSION=$(node -p "require('./package.json').version")
94+
echo "new_version=$NEW_VERSION" >> $GITHUB_OUTPUT
95+
96+
# Commit version bump
97+
git add package.json pnpm-lock.yaml
98+
git commit -m "chore: bump version to $NEW_VERSION [skip ci]"
99+
100+
- name: Build for production
101+
run: pnpm build
102+
103+
- name: Publish to NPM
104+
env:
105+
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
106+
run: pnpm publish --no-git-checks
107+
108+
- name: Create git tag
109+
run: |
110+
git tag "v${{ steps.version.outputs.new_version }}"
111+
112+
- name: Push changes and tags
113+
run: |
114+
git push origin main
115+
git push origin "v${{ steps.version.outputs.new_version }}"

.husky/pre-commit

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,4 @@
11
#!/bin/sh
2+
pnpm install
3+
pnpm build
24
pnpm pre-commit

evals/mcp-eval-basic.config.json

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
{
2+
"server": {
3+
"transport": "stdio",
4+
"command": "node",
5+
"args": ["./cli.js"],
6+
"env": {
7+
"BROWSERBASE_API_KEY": "${BROWSERBASE_API_KEY}",
8+
"BROWSERBASE_PROJECT_ID": "${BROWSERBASE_PROJECT_ID}",
9+
"GEMINI_API_KEY": "${GEMINI_API_KEY}"
10+
}
11+
},
12+
"timeout": 180000,
13+
"llmJudge": false,
14+
"workflows": [
15+
{
16+
"name": "basic-navigation-test",
17+
"description": "Test basic browser navigation functionality",
18+
"steps": [
19+
{
20+
"user": "Create a browser session, navigate to https://example.com, and close the session",
21+
"expectedState": "closed"
22+
}
23+
],
24+
"expectTools": [
25+
"browserbase_session_create",
26+
"browserbase_stagehand_navigate",
27+
"browserbase_session_close"
28+
]
29+
},
30+
{
31+
"name": "search-and-extract-test",
32+
"description": "Test navigation, search interaction, and data extraction",
33+
"steps": [
34+
{
35+
"user": "Create a browser session, navigate to https://example.com, extract the page title, and close the session",
36+
"expectedState": "Example Domain"
37+
}
38+
],
39+
"expectTools": [
40+
"browserbase_session_create",
41+
"browserbase_stagehand_navigate",
42+
"browserbase_stagehand_extract",
43+
"browserbase_session_close"
44+
]
45+
},
46+
{
47+
"name": "observe-and-interact-test",
48+
"description": "Test element observation and interaction capabilities",
49+
"steps": [
50+
{
51+
"user": "Create a browser session, navigate to https://example.com, observe the page elements, and close the session",
52+
"expectedState": "closed"
53+
}
54+
],
55+
"expectTools": [
56+
"browserbase_session_create",
57+
"browserbase_stagehand_navigate",
58+
"browserbase_stagehand_observe",
59+
"browserbase_session_close"
60+
]
61+
},
62+
{
63+
"name": "screenshot-test",
64+
"description": "Test screenshot functionality",
65+
"steps": [
66+
{
67+
"user": "Create a browser session, navigate to https://example.com, take a screenshot, and close the session",
68+
"expectedState": "closed"
69+
}
70+
],
71+
"expectTools": [
72+
"browserbase_session_create",
73+
"browserbase_stagehand_navigate",
74+
"browserbase_screenshot",
75+
"browserbase_session_close"
76+
]
77+
},
78+
{
79+
"name": "multi-session-test",
80+
"description": "Test multi-session browser management",
81+
"steps": [
82+
{
83+
"user": "Create a multi-session browser named 'test-session', list all sessions, navigate to https://example.com in that session, and close the session",
84+
"expectedState": "closed"
85+
}
86+
],
87+
"expectTools": [
88+
"multi_browserbase_stagehand_session_create",
89+
"multi_browserbase_stagehand_session_list",
90+
"multi_browserbase_stagehand_navigate_session",
91+
"multi_browserbase_stagehand_session_close"
92+
]
93+
},
94+
{
95+
"name": "form-interaction-test",
96+
"description": "Test form filling and submission capabilities",
97+
"steps": [
98+
{
99+
"user": "Create a browser session, navigate to https://httpbin.org/forms/post, fill in the customer name field with 'TestUser', and close the session",
100+
"expectedState": "closed"
101+
}
102+
],
103+
"expectTools": [
104+
"browserbase_session_create",
105+
"browserbase_stagehand_navigate",
106+
"browserbase_stagehand_act",
107+
"browserbase_session_close"
108+
]
109+
},
110+
{
111+
"name": "error-handling-test",
112+
"description": "Test error handling for invalid operations",
113+
"steps": [
114+
{
115+
"user": "Create a browser session and try to navigate to an invalid URL like 'invalid-url-test'",
116+
"expectedState": "error"
117+
}
118+
],
119+
"expectTools": [
120+
"browserbase_session_create",
121+
"browserbase_stagehand_navigate"
122+
]
123+
},
124+
{
125+
"name": "url-retrieval-test",
126+
"description": "Test URL retrieval functionality",
127+
"steps": [
128+
{
129+
"user": "Create a browser session, navigate to https://example.com, get the current URL to verify navigation, and close the session",
130+
"expectedState": "https://example.com"
131+
}
132+
],
133+
"expectTools": [
134+
"browserbase_session_create",
135+
"browserbase_stagehand_navigate",
136+
"browserbase_stagehand_get_url",
137+
"browserbase_session_close"
138+
]
139+
}
140+
]
141+
}

evals/mcp-eval-minimal.config.json

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
{
2+
"server": {
3+
"transport": "stdio",
4+
"command": "node",
5+
"args": ["./cli.js"],
6+
"env": {
7+
"BROWSERBASE_API_KEY": "${BROWSERBASE_API_KEY}",
8+
"BROWSERBASE_PROJECT_ID": "${BROWSERBASE_PROJECT_ID}",
9+
"GEMINI_API_KEY": "${GEMINI_API_KEY}"
10+
}
11+
},
12+
"timeout": 60000,
13+
"llmJudge": false,
14+
"workflows": [
15+
{
16+
"name": "smoke-test-navigation",
17+
"description": "Quick test to verify basic navigation works",
18+
"steps": [
19+
{
20+
"user": "Open a browser and go to example.org",
21+
"expectedState": "session created"
22+
},
23+
{
24+
"user": "Close the browser",
25+
"expectedState": "session closed"
26+
}
27+
],
28+
"expectTools": [
29+
"browserbase_session_create",
30+
"browserbase_stagehand_navigate",
31+
"browserbase_session_close"
32+
]
33+
},
34+
{
35+
"name": "smoke-test-extraction",
36+
"description": "Quick test to verify data extraction works",
37+
"steps": [
38+
{
39+
"user": "Navigate to example.org and extract the page title",
40+
"expectedState": "Example Domain"
41+
},
42+
{
43+
"user": "Close the session",
44+
"expectedState": "session closed"
45+
}
46+
],
47+
"expectTools": [
48+
"browserbase_session_create",
49+
"browserbase_stagehand_navigate",
50+
"browserbase_stagehand_extract",
51+
"browserbase_session_close"
52+
]
53+
},
54+
{
55+
"name": "smoke-test-multi-session",
56+
"description": "Quick test to verify multi-session functionality",
57+
"steps": [
58+
{
59+
"user": "Create a browser session named 'test-session'",
60+
"expectedState": "Created session"
61+
},
62+
{
63+
"user": "List active sessions",
64+
"expectedState": "test-session"
65+
},
66+
{
67+
"user": "Close the test session",
68+
"expectedState": "closed session"
69+
}
70+
],
71+
"expectTools": [
72+
"multi_browserbase_stagehand_session_create",
73+
"multi_browserbase_stagehand_session_list",
74+
"multi_browserbase_stagehand_session_close"
75+
]
76+
},
77+
{
78+
"name": "smoke-test-url-tools",
79+
"description": "Quick test to verify URL retrieval tools work",
80+
"steps": [
81+
{
82+
"user": "Create a browser session, navigate to example.org, get the current URL, and close the session",
83+
"expectedState": "example.org"
84+
}
85+
],
86+
"expectTools": [
87+
"browserbase_session_create",
88+
"browserbase_stagehand_navigate",
89+
"browserbase_stagehand_get_url",
90+
"browserbase_session_close"
91+
]
92+
}
93+
]
94+
}

0 commit comments

Comments
 (0)