@@ -45,10 +45,11 @@ export function describeAccuracyTests(
45
45
46
46
eachTest ( "$prompt" , async function ( testConfig ) {
47
47
testTools . mockTools ( testConfig . mockedTools ) ;
48
+ const toolsForModel = testTools . vercelAiTools ( ) ;
48
49
const promptForModel = testConfig . injectConnectedAssumption
49
50
? [ testConfig . prompt , "(Assume that you are already connected to a MongoDB cluster!)" ] . join ( " " )
50
51
: testConfig . prompt ;
51
- const conversation = await agent . prompt ( promptForModel , model , testTools . vercelAiTools ( ) ) ;
52
+ const conversation = await agent . prompt ( promptForModel , model , toolsForModel ) ;
52
53
const toolCalls = testTools . getToolCalls ( ) ;
53
54
const toolCallingAccuracy = toolCallingAccuracyScorer ( testConfig . expectedToolCalls , toolCalls ) ;
54
55
const parameterMatchingAccuracy = parameterMatchingAccuracyScorer ( testConfig . expectedToolCalls , toolCalls ) ;
@@ -73,6 +74,7 @@ export function describeAccuracyTests(
73
74
expect ( parameterMatchingAccuracy ) . toBeGreaterThanOrEqual ( 0.5 ) ;
74
75
} catch ( error ) {
75
76
console . warn ( `Accuracy test failed for ${ model . modelName } - ${ suiteName } - ${ testConfig . prompt } ` ) ;
77
+ console . debug ( `Provided tools` , JSON . stringify ( toolsForModel , null , 2 ) ) ;
76
78
console . debug ( `Conversation` , JSON . stringify ( conversation , null , 2 ) ) ;
77
79
console . debug ( `Tool calls` , JSON . stringify ( toolCalls , null , 2 ) ) ;
78
80
console . debug ( `Tool calling accuracy` , toolCallingAccuracy ) ;
0 commit comments