Skip to content

Commit ecdecd0

Browse files
chore: replace mock mcp client with real (mockable) mcp client
When writing test cases, I realized that it is too much duplicated effort to write and maintain mocks. So instead of having only a mocked mcp client, this commit introduces a real mcp client that talks to our mcp server and is still mockable. We are now setting up real MCP client with test data in mongodb database spun up for test suites. Mocking is still an option but we likely never feel the need for that.
1 parent a544560 commit ecdecd0

File tree

8 files changed

+2799
-189
lines changed

8 files changed

+2799
-189
lines changed
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
import path from "path";
2+
import { v4 as uuid } from "uuid";
3+
import { fileURLToPath } from "url";
4+
import { experimental_createMCPClient as createMCPClient, tool as createVercelTool } from "ai";
5+
import { CallToolResult } from "@modelcontextprotocol/sdk/types.js";
6+
import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js";
7+
8+
import { ToolCall } from "./accuracy-scorers.js";
9+
10+
const __dirname = fileURLToPath(import.meta.url);
11+
const distPath = path.join(__dirname, "..", "..", "..", "..", "dist");
12+
const cliScriptPath = path.join(distPath, "index.js");
13+
14+
type ToolResultGeneratorFn = (...parameters: unknown[]) => CallToolResult | Promise<CallToolResult>;
15+
export type MockedTools = Record<string, ToolResultGeneratorFn>;
16+
17+
export class AccuracyTestingClient {
18+
private mockedTools: MockedTools = {};
19+
private recordedToolCalls: ToolCall[] = [];
20+
private constructor(private readonly client: Awaited<ReturnType<typeof createMCPClient>>) {}
21+
22+
async close() {
23+
await this.client?.close();
24+
}
25+
26+
async vercelTools() {
27+
const vercelTools = (await this.client?.tools()) ?? {};
28+
const rewrappedVercelTools: typeof vercelTools = {};
29+
for (const [toolName, tool] of Object.entries(vercelTools)) {
30+
rewrappedVercelTools[toolName] = createVercelTool({
31+
...tool,
32+
execute: async (args, options) => {
33+
this.recordedToolCalls.push({
34+
toolCallId: uuid(),
35+
toolName: toolName,
36+
parameters: args,
37+
});
38+
const toolResultGeneratorFn = this.mockedTools[toolName];
39+
if (toolResultGeneratorFn) {
40+
return await toolResultGeneratorFn(args);
41+
}
42+
43+
return tool.execute(args, options);
44+
},
45+
});
46+
}
47+
48+
return rewrappedVercelTools;
49+
}
50+
51+
getToolCalls() {
52+
return this.recordedToolCalls;
53+
}
54+
55+
mockTools(mockedTools: MockedTools) {
56+
this.mockedTools = mockedTools;
57+
}
58+
59+
resetForTests() {
60+
this.mockTools({});
61+
this.recordedToolCalls = [];
62+
}
63+
64+
static async initializeClient(mdbConnectionString: string) {
65+
const clientTransport = new StdioClientTransport({
66+
command: process.execPath,
67+
args: [cliScriptPath, "--connectionString", mdbConnectionString],
68+
});
69+
70+
const client = await createMCPClient({
71+
transport: clientTransport,
72+
});
73+
74+
return new AccuracyTestingClient(client);
75+
}
76+
}
Lines changed: 50 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
1-
import { Tool } from "@modelcontextprotocol/sdk/types.js";
2-
import { discoverMongoDBTools, TestTools, MockedTools } from "./test-tools.js";
31
import { TestableModels } from "./models.js";
42
import { ExpectedToolCall, parameterMatchingAccuracyScorer, toolCallingAccuracyScorer } from "./accuracy-scorers.js";
53
import { Agent, getVercelToolCallingAgent } from "./agent.js";
6-
import { appendAccuracySnapshot } from "./accuracy-snapshot.js";
4+
import { prepareTestData, setupMongoDBIntegrationTest } from "../../integration/tools/mongodb/mongodbHelpers.js";
5+
import { AccuracyTestingClient, MockedTools } from "./accuracy-testing-client.js";
76

87
export interface AccuracyTestConfig {
98
systemPrompt?: string;
@@ -13,68 +12,71 @@ export interface AccuracyTestConfig {
1312
mockedTools: MockedTools;
1413
}
1514

15+
export function describeSuite(suiteName: string, testConfigs: AccuracyTestConfig[]) {
16+
return {
17+
[suiteName]: testConfigs,
18+
};
19+
}
20+
1621
export function describeAccuracyTests(
17-
suiteName: string,
1822
models: TestableModels,
19-
accuracyTestConfigs: AccuracyTestConfig[]
23+
accuracyTestConfigs: {
24+
[suiteName: string]: AccuracyTestConfig[];
25+
}
2026
) {
21-
const accuracyDatetime = process.env.MDB_ACCURACY_DATETIME;
22-
const accuracyCommit = process.env.MDB_ACCURACY_COMMIT;
23-
2427
if (!models.length) {
25-
console.warn(`No models available to test ${suiteName}`);
26-
return;
28+
throw new Error("No models available to test!");
2729
}
2830

2931
const eachModel = describe.each(models);
30-
const eachTest = it.each(accuracyTestConfigs);
32+
const eachSuite = describe.each(Object.keys(accuracyTestConfigs));
33+
34+
eachModel(`$modelName`, function (model) {
35+
const mdbIntegration = setupMongoDBIntegrationTest();
36+
const populateTestData = prepareTestData(mdbIntegration);
3137

32-
eachModel(`$modelName - ${suiteName}`, function (model) {
33-
let mcpTools: Tool[];
34-
let testTools: TestTools;
38+
let testMCPClient: AccuracyTestingClient;
3539
let agent: Agent;
3640

3741
beforeAll(async () => {
38-
mcpTools = await discoverMongoDBTools();
42+
testMCPClient = await AccuracyTestingClient.initializeClient(mdbIntegration.connectionString());
43+
agent = getVercelToolCallingAgent();
3944
});
4045

41-
beforeEach(() => {
42-
testTools = new TestTools(mcpTools);
43-
agent = getVercelToolCallingAgent();
46+
beforeEach(async () => {
47+
await populateTestData();
48+
testMCPClient.resetForTests();
49+
});
50+
51+
afterAll(async () => {
52+
await testMCPClient.close();
4453
});
4554

46-
eachTest("$prompt", async function (testConfig) {
47-
testTools.mockTools(testConfig.mockedTools);
48-
const toolsForModel = testTools.vercelAiTools();
49-
const promptForModel = testConfig.injectConnectedAssumption
50-
? [testConfig.prompt, "(Assume that you are already connected to a MongoDB cluster!)"].join(" ")
51-
: testConfig.prompt;
52-
const conversation = await agent.prompt(promptForModel, model, toolsForModel);
53-
const toolCalls = testTools.getToolCalls();
54-
const toolCallingAccuracy = toolCallingAccuracyScorer(testConfig.expectedToolCalls, toolCalls);
55-
const parameterMatchingAccuracy = parameterMatchingAccuracyScorer(testConfig.expectedToolCalls, toolCalls);
56-
console.debug(`Conversation`, JSON.stringify(conversation, null, 2));
57-
console.debug(`Tool calls`, JSON.stringify(toolCalls, null, 2));
58-
console.debug(
59-
"Tool calling accuracy: %s, Parameter Accuracy: %s",
60-
toolCallingAccuracy,
61-
parameterMatchingAccuracy
62-
);
63-
if (accuracyDatetime && accuracyCommit) {
64-
await appendAccuracySnapshot({
65-
datetime: accuracyDatetime,
66-
commit: accuracyCommit,
67-
model: model.modelName,
68-
suite: suiteName,
69-
test: testConfig.prompt,
55+
eachSuite("%s", function (suiteName) {
56+
const eachTest = it.each(accuracyTestConfigs[suiteName] ?? []);
57+
58+
eachTest("$prompt", async function (testConfig) {
59+
testMCPClient.mockTools(testConfig.mockedTools);
60+
const toolsForModel = await testMCPClient.vercelTools();
61+
const promptForModel = testConfig.injectConnectedAssumption
62+
? [testConfig.prompt, "(Assume that you are already connected to a MongoDB cluster!)"].join(" ")
63+
: testConfig.prompt;
64+
const conversation = await agent.prompt(promptForModel, model, toolsForModel);
65+
const toolCalls = testMCPClient.getToolCalls();
66+
const toolCallingAccuracy = toolCallingAccuracyScorer(testConfig.expectedToolCalls, toolCalls);
67+
const parameterMatchingAccuracy = parameterMatchingAccuracyScorer(
68+
testConfig.expectedToolCalls,
69+
toolCalls
70+
);
71+
console.debug(testConfig.prompt);
72+
console.debug(`Conversation`, JSON.stringify(conversation, null, 2));
73+
// console.debug(`Tool calls`, JSON.stringify(toolCalls, null, 2));
74+
console.debug(
75+
"Tool calling accuracy: %s, Parameter Accuracy: %s",
7076
toolCallingAccuracy,
71-
parameterAccuracy: parameterMatchingAccuracy,
72-
});
73-
} else {
74-
console.info(
75-
`Skipping accuracy snapshot update for ${model.modelName} - ${suiteName} - ${testConfig.prompt}`
77+
parameterMatchingAccuracy
7678
);
77-
}
79+
});
7880
});
7981
});
8082
}

tests/accuracy/sdk/test-tools.ts

Lines changed: 0 additions & 140 deletions
This file was deleted.

0 commit comments

Comments
 (0)