Skip to content

Commit 0284d18

Browse files
authored
chore: support an html route in evals (ChromeDevTools#770)
Allows using the server URL in the prompt.
1 parent 257b994 commit 0284d18

File tree

3 files changed

+63
-6
lines changed

3 files changed

+63
-6
lines changed

scripts/eval_gemini.ts

Lines changed: 38 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
import fs from 'node:fs';
88
import path from 'node:path';
9-
import {describe, test} from 'node:test';
9+
import {describe, test, before, after, afterEach} from 'node:test';
1010

1111
import {
1212
GoogleGenerativeAI,
@@ -18,6 +18,7 @@ import {StdioClientTransport} from '@modelcontextprotocol/sdk/client/stdio.js';
1818

1919
const ROOT_DIR = path.resolve(import.meta.dirname, '..');
2020
const SCENARIOS_DIR = path.join(import.meta.dirname, 'eval_scenarios');
21+
import {TestServer} from '../build/tests/server.js';
2122

2223
// Define schema for our test scenarios
2324
export interface CapturedFunctionCall {
@@ -29,6 +30,10 @@ export interface TestScenario {
2930
prompt: string;
3031
maxTurns: number;
3132
expectations: (calls: CapturedFunctionCall[]) => void;
33+
htmlRoute?: {
34+
path: string;
35+
htmlContent: string;
36+
};
3237
}
3338

3439
async function loadScenario(scenarioPath: string): Promise<TestScenario> {
@@ -78,6 +83,7 @@ const cleanSchemaRecursive = (schema: unknown): unknown => {
7883
async function runSingleScenario(
7984
scenarioPath: string,
8085
apiKey: string,
86+
server: TestServer,
8187
): Promise<void> {
8288
const absolutePath = path.resolve(scenarioPath);
8389
console.log(`\n### Running Scenario: ${absolutePath} ###`);
@@ -88,6 +94,17 @@ async function runSingleScenario(
8894
try {
8995
const scenario = await loadScenario(absolutePath);
9096

97+
if (scenario.htmlRoute) {
98+
server.addHtmlRoute(
99+
scenario.htmlRoute.path,
100+
scenario.htmlRoute.htmlContent,
101+
);
102+
scenario.prompt = scenario.prompt.replace(
103+
'<TEST_URL>',
104+
server.getRoute(scenario.htmlRoute.path),
105+
);
106+
}
107+
91108
// Path to the compiled MCP server
92109
const serverPath = path.join(ROOT_DIR, 'build/src/index.js');
93110
if (!fs.existsSync(serverPath)) {
@@ -148,7 +165,7 @@ async function runSingleScenario(
148165

149166
const genAI = new GoogleGenerativeAI(apiKey);
150167
const model = genAI.getGenerativeModel({
151-
model: 'gemini-3-pro-preview',
168+
model: 'gemini-2.5-flash',
152169
tools: [{functionDeclarations}],
153170
});
154171

@@ -167,7 +184,9 @@ async function runSingleScenario(
167184
console.log(`\n--- Turn 1 (User) ---`);
168185
console.log(scenario.prompt);
169186

170-
let result = await chat.sendMessage(scenario.prompt);
187+
let result = await chat.sendMessage(scenario.prompt, {
188+
timeout: 5000,
189+
});
171190
let response = result.response;
172191

173192
while (turnCount < scenario.maxTurns) {
@@ -256,13 +275,27 @@ if (!apiKey) {
256275
}
257276

258277
void describe('Gemini Eval Scenarios', () => {
278+
const server = new TestServer(TestServer.randomPort());
279+
280+
before(async () => {
281+
await server.start();
282+
});
283+
284+
after(async () => {
285+
await server.stop();
286+
});
287+
288+
afterEach(() => {
289+
server.restore();
290+
});
291+
259292
const files = fs.readdirSync(SCENARIOS_DIR).filter(file => {
260293
return file.endsWith('.ts') || file.endsWith('.js');
261294
});
262295

263296
for (const file of files) {
264-
void test(file, async () => {
265-
await runSingleScenario(path.join(SCENARIOS_DIR, file), apiKey);
297+
void test(file, {timeout: 60_000}, async () => {
298+
await runSingleScenario(path.join(SCENARIOS_DIR, file), apiKey, server);
266299
});
267300
}
268301
});
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
/**
2+
* @license
3+
* Copyright 2026 Google LLC
4+
* SPDX-License-Identifier: Apache-2.0
5+
*/
6+
import assert from 'node:assert';
7+
8+
import type {TestScenario} from '../eval_gemini.ts';
9+
10+
export const scenario: TestScenario = {
11+
prompt: 'Read the content of <TEST_URL>',
12+
maxTurns: 3,
13+
htmlRoute: {
14+
path: '/test.html',
15+
htmlContent: '<h1>Hello World</h1><p>This is a test.</p>',
16+
},
17+
expectations: calls => {
18+
assert.strictEqual(calls.length, 2);
19+
assert.ok(
20+
calls[0].name === 'navigate_page' || calls[0].name === 'new_page',
21+
);
22+
assert.ok(calls[1].name === 'take_snapshot');
23+
},
24+
};

tests/server.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ import {before, after, afterEach} from 'node:test';
1313

1414
import {html} from './utils.js';
1515

16-
class TestServer {
16+
export class TestServer {
1717
#port: number;
1818
#server: Server;
1919

0 commit comments

Comments
 (0)