Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@
"valibot": "1.2.0"
},
"devDependencies": {
"@ai-sdk/mcp": "^0.0.12",
"@ai-sdk/mcp": "^1.0.1",
"@ai-sdk/openai": "3.0.0-beta.101",
"@iconify-json/vscode-icons": "^1.2.37",
"@nuxt/eslint": "^1.12.1",
Expand Down
56 changes: 28 additions & 28 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

38 changes: 28 additions & 10 deletions test/mcp.eval.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import { experimental_createMCPClient as createMCPClient } from '@ai-sdk/mcp'
import { createMCPClient } from '@ai-sdk/mcp'
import { generateText } from 'ai'
import { evalite } from 'evalite'
import { toolCallAccuracy } from 'evalite/scorers'
import type { Evalite } from 'evalite'

type ToolCall = Evalite.Scorers.ToolCall

/**
* MCP Evaluation Tests
Expand Down Expand Up @@ -43,14 +46,15 @@ evalite('Evaluate Nuxt MCP Documentation Tools', {
const result = await generateText({
model,
prompt: input,
// @ts-expect-error - MCP tools type mismatch with ai SDK ToolSet
tools: await mcpClient.tools()
})
return result.toolCalls ?? []
} finally {
await mcpClient.close()
}
},
scorers: [async ({ output, expected }) => toolCallAccuracy({ actualCalls: output, expectedCalls: expected })]
scorers: [async ({ output, expected }) => toolCallAccuracy({ actualCalls: output as ToolCall[], expectedCalls: expected as ToolCall[] })]
})

evalite('Evaluate Nuxt MCP Blog Tools', {
Expand All @@ -66,14 +70,15 @@ evalite('Evaluate Nuxt MCP Blog Tools', {
const result = await generateText({
model,
prompt: input,
// @ts-expect-error - MCP tools type mismatch with ai SDK ToolSet
tools: await mcpClient.tools()
})
return result.toolCalls ?? []
} finally {
await mcpClient.close()
}
},
scorers: [async ({ output, expected }) => toolCallAccuracy({ actualCalls: output, expectedCalls: expected })]
scorers: [async ({ output, expected }) => toolCallAccuracy({ actualCalls: output as ToolCall[], expectedCalls: expected as ToolCall[] })]
})

evalite('Evaluate Nuxt MCP Deploy Tools', {
Expand All @@ -90,17 +95,18 @@ evalite('Evaluate Nuxt MCP Deploy Tools', {
const result = await generateText({
model,
prompt: input,
// @ts-expect-error - MCP tools type mismatch with ai SDK ToolSet
tools: await mcpClient.tools()
})
return result.toolCalls ?? []
} finally {
await mcpClient.close()
}
},
scorers: [async ({ output, expected }) => toolCallAccuracy({ actualCalls: output, expectedCalls: expected })]
scorers: [async ({ output, expected }) => toolCallAccuracy({ actualCalls: output as ToolCall[], expectedCalls: expected as ToolCall[] })]
})

evalite('Evaluate Nuxt MCP Module Tools', {
evalite<string, ToolCall[], ToolCall[]>('Evaluate Nuxt MCP Module Tools', {
data: async () => [
{ input: 'I need to add authentication with social login providers to my app. Find me a suitable module.', expected: [{ toolName: 'list_modules', input: { category: 'authentication' } }] },
{ input: 'What modules are available for image optimization and lazy loading?', expected: [{ toolName: 'list_modules', input: { category: 'media' } }] },
Expand All @@ -111,25 +117,37 @@ evalite('Evaluate Nuxt MCP Module Tools', {
task: async (input) => {
const mcpClient = await createMCPClient({ transport: { type: 'http', url: MCP_URL } })
try {
const result = await generateText({ model, prompt: input, tools: await mcpClient.tools(), maxSteps: 3 })
return result.toolCalls ?? []
const result = await generateText({
model,
prompt: input,
// @ts-expect-error - MCP tools type mismatch with ai SDK ToolSet
tools: await mcpClient.tools(),
maxSteps: 3
})
return result.toolCalls as ToolCall[] ?? []
} finally {
await mcpClient.close()
}
},
scorers: [async ({ output, expected }) => toolCallAccuracy({ actualCalls: output, expectedCalls: expected })]
})

evalite('Evaluate Nuxt MCP Cross-Tool Workflows', {
evalite<string, ToolCall[], ToolCall[]>('Evaluate Nuxt MCP Cross-Tool Workflows', {
data: async () => [
{ input: 'I want to build an e-commerce site with Nuxt 4. What modules do I need and where should I deploy it?', expected: [{ toolName: 'list_modules' }, { toolName: 'list_deploy_providers' }] },
{ input: 'Show me the latest features in Nuxt 4 and link to the relevant documentation', expected: [{ toolName: 'list_blog_posts' }, { toolName: 'get_documentation_page', input: { path: '/docs/4.x/getting-started/introduction' } }] }
],
task: async (input) => {
const mcpClient = await createMCPClient({ transport: { type: 'http', url: MCP_URL } })
try {
const result = await generateText({ model, prompt: input, tools: await mcpClient.tools(), maxSteps: 5 })
return result.toolCalls ?? []
const result = await generateText({
model,
prompt: input,
// @ts-expect-error - MCP tools type mismatch with ai SDK ToolSet
tools: await mcpClient.tools(),
maxSteps: 5
})
return result.toolCalls as ToolCall[] ?? []
} finally {
await mcpClient.close()
}
Expand Down
17 changes: 17 additions & 0 deletions test/tsconfig.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"compilerOptions": {
"module": "NodeNext",
"moduleResolution": "NodeNext",
"target": "ESNext",
"strict": true,
"esModuleInterop": true,
"skipLibCheck": true,
"resolveJsonModule": true,
"isolatedModules": true,
"verbatimModuleSyntax": true,
"noEmit": true
},
"include": [
"*.eval.ts"
]
}