@@ -10,6 +10,7 @@ import { availableModels } from "./models/index.js";
10
10
import { ToolDefinition } from "./models/model.js" ;
11
11
import { zodToJsonSchema } from "zod-to-json-schema" ;
12
12
13
+ type ToolMockReturn = { content : Array < { type : string ; text : string } > } ;
13
14
class ToolMock {
14
15
readonly name : string ;
15
16
arguments : unknown ;
@@ -27,7 +28,7 @@ class ToolMock {
27
28
return this ;
28
29
}
29
30
30
- thenReturn ( value : unknown ) : this {
31
+ thenReturn ( value : ToolMockReturn ) : this {
31
32
this . returns = value ;
32
33
return this ;
33
34
}
@@ -55,6 +56,36 @@ type AccuracyTestCaseFn = (tools: AccuracyToolSetupFunction) => void;
55
56
type AccuracyItFn = ( prompt : string , testCase : AccuracyTestCaseFn ) => void ;
56
57
type AccuracyTestSuite = { prompt : AccuracyItFn } ;
57
58
59
+ type NonMockedCallError = { tool : string ; args : unknown } ;
60
+
61
+ function logVerbose ( ...args : unknown [ ] ) : void {
62
+ if ( process . env . MONGODB_MCP_TEST_VERBOSE === "true" ) {
63
+ console . log ( ...args ) ;
64
+ }
65
+ }
66
+
67
+ function printModelPlanIfVerbose ( model : string , plan : string [ ] ) : void {
68
+ logVerbose ( model , "📝: " , plan . join ( "\n" ) ) ;
69
+ }
70
+
71
+ function testPromptIsVerbose ( model : string , prompt : string ) : void {
72
+ logVerbose ( model , "📜: " , prompt ) ;
73
+ }
74
+
75
+ function modelSaidVerbose ( model : string , response : string ) : void {
76
+ if ( response . length > 0 ) {
77
+ logVerbose ( model , "🗣️: " , response ) ;
78
+ }
79
+ }
80
+
81
+ function modelToolCalledVerbose ( model : string , toolCall : string , args : unknown ) : void {
82
+ logVerbose ( model , "🛠️: " , toolCall , JSON . stringify ( args ) ) ;
83
+ }
84
+
85
+ function toolCallsReturnedVerbose ( model : string , answer : string ) : void {
86
+ logVerbose ( model , "📋: " , answer ) ;
87
+ }
88
+
58
89
export function describeAccuracyTest ( useCase : string , testCaseFn : ( testSuite : AccuracyTestSuite ) => void ) {
59
90
const models = availableModels ( ) ;
60
91
if ( models . length === 0 ) {
@@ -105,8 +136,13 @@ export function describeAccuracyTest(useCase: string, testCaseFn: (testSuite: Ac
105
136
106
137
const promptFn : AccuracyItFn = ( prompt : string , testCase : AccuracyTestCaseFn ) => {
107
138
it ( prompt , async ( ) => {
139
+ testPromptIsVerbose ( model . name , prompt ) ;
140
+
108
141
const mcpServerUnsafe = ( mcpServer as unknown as McpServerUnsafe ) . mcpServer ;
109
142
const tools = mcpServerUnsafe [ "_registeredTools" ] as { [ toolName : string ] : RegisteredTool } ;
143
+ const mockedTools = new Set < string > ( ) ;
144
+ const nonMockedCallErrors = new Array < NonMockedCallError > ( ) ;
145
+
110
146
const toolDefinitions = Object . entries ( tools ) . map ( ( [ toolName , tool ] ) => {
111
147
if ( ! tool . inputSchema ) {
112
148
throw new Error ( `Tool ${ toolName } does not have an input schema defined.` ) ;
@@ -136,17 +172,22 @@ export function describeAccuracyTest(useCase: string, testCaseFn: (testSuite: Ac
136
172
return toolForApi ;
137
173
} ) ;
138
174
139
- const mocks : Array < ToolMock > = [ ] ;
175
+ const plan = await model . generatePlan ( prompt , toolDefinitions ) ;
176
+ printModelPlanIfVerbose ( model . name , plan ) ;
177
+
178
+
179
+ const mocks : Array < ToolMock > = [ ] ;
140
180
const toolFn : AccuracyToolSetupFunction = ( toolName : string ) => {
141
181
const mock = new ToolMock ( toolName ) ;
182
+ mockedTools . add ( toolName ) ;
142
183
143
184
const mcpServerUnsafe = ( mcpServer as unknown as McpServerUnsafe ) . mcpServer ;
144
185
const tools = mcpServerUnsafe [ "_registeredTools" ] as { [ toolName : string ] : RegisteredTool } ;
145
186
146
187
if ( tools [ toolName ] !== undefined ) {
147
188
tools [ toolName ] . callback = ( ( args : unknown ) => {
148
189
mock . _wasCalledWith ( args ) ;
149
- return mock . returns ;
190
+ return Promise . resolve ( mock . returns ) ;
150
191
} ) as unknown as ToolCallback ;
151
192
}
152
193
@@ -157,30 +198,55 @@ export function describeAccuracyTest(useCase: string, testCaseFn: (testSuite: Ac
157
198
testCase ( toolFn ) ;
158
199
159
200
const consumePromptUntilNoMoreCall = async ( prompt : string [ ] ) => {
160
- const promptStr = prompt . join ( "\n" ) ;
161
- const response = await model . generateContent ( promptStr , toolDefinitions ) ;
201
+ const response = await model . generateContent ( prompt , toolDefinitions ) ;
162
202
203
+ modelSaidVerbose ( model . name , response . text || "<no text>" ) ;
163
204
if ( response . toolCall . length > 0 ) {
164
205
const toolCallResults = await Promise . all (
165
- response . toolCall . map ( ( tc ) =>
166
- mcpClient . callTool ( {
206
+ response . toolCall . map ( ( tc ) => {
207
+ modelToolCalledVerbose ( model . name , tc . name , tc . args ) ;
208
+
209
+ if ( ! mockedTools . has ( tc . name ) ) {
210
+ nonMockedCallErrors . push ( { tool : tc . name , args : tc . args } ) ;
211
+ }
212
+
213
+ return mcpClient . callTool ( {
167
214
name : tc . name ,
168
215
arguments : tc . args ,
169
- } )
170
- )
216
+ } ) ;
217
+ } )
171
218
) ;
172
- const newPrompt = toolCallResults . flatMap ( ( result ) =>
219
+
220
+ const responseParts = toolCallResults . flatMap ( ( result ) =>
173
221
( result . content as Array < { text : string } > ) . map ( ( c ) => c . text )
174
222
) ;
175
223
176
- if ( newPrompt . join ( "\n" ) . trim ( ) . length > 0 ) {
224
+ const newPrompt = prompt . concat ( responseParts ) ;
225
+ toolCallsReturnedVerbose ( model . name , newPrompt . join ( "\n" ) ) ;
226
+
227
+ if ( responseParts . length > 0 ) {
177
228
return consumePromptUntilNoMoreCall ( newPrompt ) ;
178
229
}
179
230
}
180
231
} ;
181
232
233
+ for ( const step of plan ) {
234
+ await consumePromptUntilNoMoreCall ( [ step ] ) ;
235
+ }
236
+
182
237
await consumePromptUntilNoMoreCall ( [ prompt ] ) ;
238
+
183
239
mocks . forEach ( ( mock ) => mock . _verify ( ) ) ;
240
+ if ( nonMockedCallErrors . length > 0 ) {
241
+ for ( const call of nonMockedCallErrors ) {
242
+ console . error (
243
+ `Non-mocked tool call detected: ${ call . tool } with args:` ,
244
+ JSON . stringify ( call . args , null , 2 )
245
+ ) ;
246
+ }
247
+
248
+ throw new Error ( "Non-mocked tool calls detected. Check the console for details." ) ;
249
+ }
184
250
} ) ;
185
251
} ;
186
252
0 commit comments