1
- import { Tool } from "@modelcontextprotocol/sdk/types.js" ;
2
- import { discoverMongoDBTools , TestTools , MockedTools } from "./test-tools.js" ;
3
1
import { TestableModels } from "./models.js" ;
4
2
import { ExpectedToolCall , parameterMatchingAccuracyScorer , toolCallingAccuracyScorer } from "./accuracy-scorers.js" ;
5
3
import { Agent , getVercelToolCallingAgent } from "./agent.js" ;
6
- import { appendAccuracySnapshot } from "./accuracy-snapshot.js" ;
4
+ import { prepareTestData , setupMongoDBIntegrationTest } from "../../integration/tools/mongodb/mongodbHelpers.js" ;
5
+ import { AccuracyTestingClient , MockedTools } from "./accuracy-testing-client.js" ;
7
6
8
7
export interface AccuracyTestConfig {
9
8
systemPrompt ?: string ;
@@ -13,68 +12,71 @@ export interface AccuracyTestConfig {
13
12
mockedTools : MockedTools ;
14
13
}
15
14
15
+ export function describeSuite ( suiteName : string , testConfigs : AccuracyTestConfig [ ] ) {
16
+ return {
17
+ [ suiteName ] : testConfigs ,
18
+ } ;
19
+ }
20
+
16
21
export function describeAccuracyTests (
17
- suiteName : string ,
18
22
models : TestableModels ,
19
- accuracyTestConfigs : AccuracyTestConfig [ ]
23
+ accuracyTestConfigs : {
24
+ [ suiteName : string ] : AccuracyTestConfig [ ] ;
25
+ }
20
26
) {
21
- const accuracyDatetime = process . env . MDB_ACCURACY_DATETIME ;
22
- const accuracyCommit = process . env . MDB_ACCURACY_COMMIT ;
23
-
24
27
if ( ! models . length ) {
25
- console . warn ( `No models available to test ${ suiteName } ` ) ;
26
- return ;
28
+ throw new Error ( "No models available to test!" ) ;
27
29
}
28
30
29
31
const eachModel = describe . each ( models ) ;
30
- const eachTest = it . each ( accuracyTestConfigs ) ;
32
+ const eachSuite = describe . each ( Object . keys ( accuracyTestConfigs ) ) ;
33
+
34
+ eachModel ( `$modelName` , function ( model ) {
35
+ const mdbIntegration = setupMongoDBIntegrationTest ( ) ;
36
+ const populateTestData = prepareTestData ( mdbIntegration ) ;
31
37
32
- eachModel ( `$modelName - ${ suiteName } ` , function ( model ) {
33
- let mcpTools : Tool [ ] ;
34
- let testTools : TestTools ;
38
+ let testMCPClient : AccuracyTestingClient ;
35
39
let agent : Agent ;
36
40
37
41
beforeAll ( async ( ) => {
38
- mcpTools = await discoverMongoDBTools ( ) ;
42
+ testMCPClient = await AccuracyTestingClient . initializeClient ( mdbIntegration . connectionString ( ) ) ;
43
+ agent = getVercelToolCallingAgent ( ) ;
39
44
} ) ;
40
45
41
- beforeEach ( ( ) => {
42
- testTools = new TestTools ( mcpTools ) ;
43
- agent = getVercelToolCallingAgent ( ) ;
46
+ beforeEach ( async ( ) => {
47
+ await populateTestData ( ) ;
48
+ testMCPClient . resetForTests ( ) ;
49
+ } ) ;
50
+
51
+ afterAll ( async ( ) => {
52
+ await testMCPClient . close ( ) ;
44
53
} ) ;
45
54
46
- eachTest ( "$prompt" , async function ( testConfig ) {
47
- testTools . mockTools ( testConfig . mockedTools ) ;
48
- const toolsForModel = testTools . vercelAiTools ( ) ;
49
- const promptForModel = testConfig . injectConnectedAssumption
50
- ? [ testConfig . prompt , "(Assume that you are already connected to a MongoDB cluster!)" ] . join ( " " )
51
- : testConfig . prompt ;
52
- const conversation = await agent . prompt ( promptForModel , model , toolsForModel ) ;
53
- const toolCalls = testTools . getToolCalls ( ) ;
54
- const toolCallingAccuracy = toolCallingAccuracyScorer ( testConfig . expectedToolCalls , toolCalls ) ;
55
- const parameterMatchingAccuracy = parameterMatchingAccuracyScorer ( testConfig . expectedToolCalls , toolCalls ) ;
56
- console . debug ( `Conversation` , JSON . stringify ( conversation , null , 2 ) ) ;
57
- console . debug ( `Tool calls` , JSON . stringify ( toolCalls , null , 2 ) ) ;
58
- console . debug (
59
- "Tool calling accuracy: %s, Parameter Accuracy: %s" ,
60
- toolCallingAccuracy ,
61
- parameterMatchingAccuracy
62
- ) ;
63
- if ( accuracyDatetime && accuracyCommit ) {
64
- await appendAccuracySnapshot ( {
65
- datetime : accuracyDatetime ,
66
- commit : accuracyCommit ,
67
- model : model . modelName ,
68
- suite : suiteName ,
69
- test : testConfig . prompt ,
55
+ eachSuite ( "%s" , function ( suiteName ) {
56
+ const eachTest = it . each ( accuracyTestConfigs [ suiteName ] ?? [ ] ) ;
57
+
58
+ eachTest ( "$prompt" , async function ( testConfig ) {
59
+ testMCPClient . mockTools ( testConfig . mockedTools ) ;
60
+ const toolsForModel = await testMCPClient . vercelTools ( ) ;
61
+ const promptForModel = testConfig . injectConnectedAssumption
62
+ ? [ testConfig . prompt , "(Assume that you are already connected to a MongoDB cluster!)" ] . join ( " " )
63
+ : testConfig . prompt ;
64
+ const conversation = await agent . prompt ( promptForModel , model , toolsForModel ) ;
65
+ const toolCalls = testMCPClient . getToolCalls ( ) ;
66
+ const toolCallingAccuracy = toolCallingAccuracyScorer ( testConfig . expectedToolCalls , toolCalls ) ;
67
+ const parameterMatchingAccuracy = parameterMatchingAccuracyScorer (
68
+ testConfig . expectedToolCalls ,
69
+ toolCalls
70
+ ) ;
71
+ console . debug ( testConfig . prompt ) ;
72
+ console . debug ( `Conversation` , JSON . stringify ( conversation , null , 2 ) ) ;
73
+ // console.debug(`Tool calls`, JSON.stringify(toolCalls, null, 2));
74
+ console . debug (
75
+ "Tool calling accuracy: %s, Parameter Accuracy: %s" ,
70
76
toolCallingAccuracy ,
71
- parameterAccuracy : parameterMatchingAccuracy ,
72
- } ) ;
73
- } else {
74
- console . info (
75
- `Skipping accuracy snapshot update for ${ model . modelName } - ${ suiteName } - ${ testConfig . prompt } `
77
+ parameterMatchingAccuracy
76
78
) ;
77
- }
79
+ } ) ;
78
80
} ) ;
79
81
} ) ;
80
82
}
0 commit comments