66
77import fs from 'node:fs' ;
88import path from 'node:path' ;
9- import { describe , test } from 'node:test' ;
9+ import { describe , test , before , after , afterEach } from 'node:test' ;
1010
1111import {
1212 GoogleGenerativeAI ,
@@ -18,6 +18,7 @@ import {StdioClientTransport} from '@modelcontextprotocol/sdk/client/stdio.js';
1818
1919const ROOT_DIR = path . resolve ( import . meta. dirname , '..' ) ;
2020const SCENARIOS_DIR = path . join ( import . meta. dirname , 'eval_scenarios' ) ;
21+ import { TestServer } from '../build/tests/server.js' ;
2122
2223// Define schema for our test scenarios
2324export interface CapturedFunctionCall {
@@ -29,6 +30,10 @@ export interface TestScenario {
2930 prompt : string ;
3031 maxTurns : number ;
3132 expectations : ( calls : CapturedFunctionCall [ ] ) => void ;
33+ htmlRoute ?: {
34+ path : string ;
35+ htmlContent : string ;
36+ } ;
3237}
3338
3439async function loadScenario ( scenarioPath : string ) : Promise < TestScenario > {
@@ -78,6 +83,7 @@ const cleanSchemaRecursive = (schema: unknown): unknown => {
7883async function runSingleScenario (
7984 scenarioPath : string ,
8085 apiKey : string ,
86+ server : TestServer ,
8187) : Promise < void > {
8288 const absolutePath = path . resolve ( scenarioPath ) ;
8389 console . log ( `\n### Running Scenario: ${ absolutePath } ###` ) ;
@@ -88,6 +94,17 @@ async function runSingleScenario(
8894 try {
8995 const scenario = await loadScenario ( absolutePath ) ;
9096
97+ if ( scenario . htmlRoute ) {
98+ server . addHtmlRoute (
99+ scenario . htmlRoute . path ,
100+ scenario . htmlRoute . htmlContent ,
101+ ) ;
102+ scenario . prompt = scenario . prompt . replace (
103+ '<TEST_URL>' ,
104+ server . getRoute ( scenario . htmlRoute . path ) ,
105+ ) ;
106+ }
107+
91108 // Path to the compiled MCP server
92109 const serverPath = path . join ( ROOT_DIR , 'build/src/index.js' ) ;
93110 if ( ! fs . existsSync ( serverPath ) ) {
@@ -148,7 +165,7 @@ async function runSingleScenario(
148165
149166 const genAI = new GoogleGenerativeAI ( apiKey ) ;
150167 const model = genAI . getGenerativeModel ( {
151- model : 'gemini-3-pro-preview ' ,
168+ model : 'gemini-2.5-flash ' ,
152169 tools : [ { functionDeclarations} ] ,
153170 } ) ;
154171
@@ -167,7 +184,9 @@ async function runSingleScenario(
167184 console . log ( `\n--- Turn 1 (User) ---` ) ;
168185 console . log ( scenario . prompt ) ;
169186
170- let result = await chat . sendMessage ( scenario . prompt ) ;
187+ let result = await chat . sendMessage ( scenario . prompt , {
188+ timeout : 5000 ,
189+ } ) ;
171190 let response = result . response ;
172191
173192 while ( turnCount < scenario . maxTurns ) {
@@ -256,13 +275,27 @@ if (!apiKey) {
256275}
257276
258277void describe ( 'Gemini Eval Scenarios' , ( ) => {
278+ const server = new TestServer ( TestServer . randomPort ( ) ) ;
279+
280+ before ( async ( ) => {
281+ await server . start ( ) ;
282+ } ) ;
283+
284+ after ( async ( ) => {
285+ await server . stop ( ) ;
286+ } ) ;
287+
288+ afterEach ( ( ) => {
289+ server . restore ( ) ;
290+ } ) ;
291+
259292 const files = fs . readdirSync ( SCENARIOS_DIR ) . filter ( file => {
260293 return file . endsWith ( '.ts' ) || file . endsWith ( '.js' ) ;
261294 } ) ;
262295
263296 for ( const file of files ) {
264- void test ( file , async ( ) => {
265- await runSingleScenario ( path . join ( SCENARIOS_DIR , file ) , apiKey ) ;
297+ void test ( file , { timeout : 60_000 } , async ( ) => {
298+ await runSingleScenario ( path . join ( SCENARIOS_DIR , file ) , apiKey , server ) ;
266299 } ) ;
267300 }
268301} ) ;
0 commit comments