improve MCP tool descriptions and add evals

onmax · onmax · commit 98af43fdfdaa · 2025-11-18T08:40:50.000-06:00
Enhanced tool descriptions with WHEN TO USE guidance, examples, and common paths. Added realistic evaluation scenarios.
diff --git a/.env.example b/.env.example
@@ -26,3 +26,7 @@ NUXT_OAUTH_GITHUB_CLIENT_SECRET=
 # Session encryption password (generate a secure random string)
 # You can use: openssl rand -base64 32
 NUXT_SESSION_PASSWORD=
+
+# MCP Evaluation (Optional - for running evalite tests)
+OPENAI_API_KEY=
+MCP_URL=
diff --git a/evalite.config.ts b/evalite.config.ts
@@ -0,0 +1,7 @@
+import { defineConfig } from 'evalite/config'
+
+export default defineConfig({
+  testTimeout: 60000, // 60s for MCP calls
+  maxConcurrency: 5, // Limit concurrent API calls
+  cache: true, // Cache AI SDK outputs
+})
diff --git a/mcp.eval.ts b/mcp.eval.ts
@@ -0,0 +1,134 @@
+import { experimental_createMCPClient as createMCPClient } from '@ai-sdk/mcp'
+import { openai } from '@ai-sdk/openai'
+import { generateText } from 'ai'
+import { evalite } from 'evalite'
+import { wrapAISDKModel } from 'evalite/ai-sdk'
+import { toolCallAccuracy } from 'evalite/scorers'
+
+/**
+ * MCP Evaluation Tests
+ *
+ * Note: The MCP server has prompts (find_documentation_for_topic, deployment_guide, migration_help)
+ * that would improve these scenarios, but @ai-sdk/mcp doesn't support converting prompts to tools yet.
+ *
+ * TODO: Once @ai-sdk/mcp supports prompt-to-tool conversion or prompt usage in generateText,
+ * uncomment the tests below that require search/topic-based navigation.
+ *
+ * Related: https://ai-sdk.dev/docs/reference/ai-sdk-core/create-mcp-client
+ */
+
+const MCP_URL = process.env.MCP_URL ?? 'http://localhost:3000/mcp'
+const model = wrapAISDKModel(openai('gpt-5.1-codex-mini'))
+
+evalite('Evaluate Nuxt MCP Documentation Tools', {
+  data: async () => [
+    // TODO: Uncomment when find_documentation_for_topic prompt becomes usable as a tool
+    // {
+    //   input: 'I keep getting hydration mismatch errors in my Nuxt app. Find the documentation that explains this.',
+    //   expected: [{ toolName: 'find_documentation_for_topic', input: { topic: 'hydration errors' } }],
+    // },
+    // {
+    //   input: 'What are the different rendering modes available in Nuxt 4 and which one should I use for SEO?',
+    //   expected: [{ toolName: 'find_documentation_for_topic', input: { topic: 'rendering modes SSR SEO' } }],
+    // },
+    // {
+    //   input: 'How do I migrate my composables from Nuxt 3 to Nuxt 4?',
+    //   expected: [{ toolName: 'migration_help', input: { fromVersion: '3.x', toVersion: '4.x' } }],
+    // },
+    {
+      input: 'Show me the introduction page for Nuxt 4',
+      expected: [{ toolName: 'get_documentation_page', input: { path: '/docs/4.x/getting-started/introduction' } }],
+    },
+  ],
+  task: async (input) => {
+    const mcpClient = await createMCPClient({ transport: { type: 'http', url: MCP_URL } })
+    try {
+      const result = await generateText({ model, prompt: input, tools: await mcpClient.tools() })
+      return result.toolCalls ?? []
+    }
+    finally {
+      await mcpClient.close()
+    }
+  },
+  scorers: [async ({ output, expected }) => toolCallAccuracy({ actualCalls: output, expectedCalls: expected })],
+})
+
+evalite('Evaluate Nuxt MCP Blog Tools', {
+  data: async () => [
+    { input: 'What are the latest performance improvements announced for Nuxt?', expected: [{ toolName: 'list_blog_posts' }] },
+    { input: 'Show me announcements about major version releases', expected: [{ toolName: 'list_blog_posts' }] },
+    { input: 'Has there been any blog post about server components or islands architecture?', expected: [{ toolName: 'list_blog_posts' }] },
+    { input: 'Get the blog post about Nuxt 4', expected: [{ toolName: 'get_blog_post', input: { path: '/blog/v4' } }] },
+  ],
+  task: async (input) => {
+    const mcpClient = await createMCPClient({ transport: { type: 'http', url: MCP_URL } })
+    try {
+      const result = await generateText({ model, prompt: input, tools: await mcpClient.tools() })
+      return result.toolCalls ?? []
+    }
+    finally {
+      await mcpClient.close()
+    }
+  },
+  scorers: [async ({ output, expected }) => toolCallAccuracy({ actualCalls: output, expectedCalls: expected })],
+})
+
+evalite('Evaluate Nuxt MCP Deploy Tools', {
+  data: async () => [
+    { input: 'I need a deployment platform that supports edge functions and has a generous free tier. What are my options?', expected: [{ toolName: 'list_deploy_providers' }] },
+    { input: 'What deployment providers support Docker containerization?', expected: [{ toolName: 'list_deploy_providers' }] },
+    { input: 'I want to self-host my Nuxt app with automatic SSL. Show me how to deploy to a VPS.', expected: [{ toolName: 'list_deploy_providers' }] },
+    // TODO: Uncomment when deployment_guide prompt becomes usable as a tool
+    // { input: 'How do I deploy to Vercel?', expected: [{ toolName: 'deployment_guide', input: { provider: 'Vercel' } }] },
+  ],
+  task: async (input) => {
+    const mcpClient = await createMCPClient({ transport: { type: 'http', url: MCP_URL } })
+    try {
+      const result = await generateText({ model, prompt: input, tools: await mcpClient.tools() })
+      return result.toolCalls ?? []
+    }
+    finally {
+      await mcpClient.close()
+    }
+  },
+  scorers: [async ({ output, expected }) => toolCallAccuracy({ actualCalls: output, expectedCalls: expected })],
+})
+
+evalite('Evaluate Nuxt MCP Module Tools', {
+  data: async () => [
+    { input: 'I need to add authentication with social login providers to my app. Find me a suitable module.', expected: [{ toolName: 'list_modules', input: { category: 'authentication' } }] },
+    { input: 'What modules are available for image optimization and lazy loading?', expected: [{ toolName: 'list_modules', input: { category: 'media' } }] },
+    { input: 'Show me popular UI component libraries for Nuxt 4', expected: [{ toolName: 'list_modules', input: { category: 'ui' } }] },
+    { input: 'I want to add i18n support for multiple languages. What module should I use and does it support Nuxt 4?', expected: [{ toolName: 'list_modules' }, { toolName: 'get_module', input: { slug: '@nuxtjs/i18n' } }] },
+    { input: 'Get details about @nuxt/ui module', expected: [{ toolName: 'get_module', input: { slug: '@nuxt/ui' } }] },
+  ],
+  task: async (input) => {
+    const mcpClient = await createMCPClient({ transport: { type: 'http', url: MCP_URL } })
+    try {
+      const result = await generateText({ model, prompt: input, tools: await mcpClient.tools(), maxSteps: 3 })
+      return result.toolCalls ?? []
+    }
+    finally {
+      await mcpClient.close()
+    }
+  },
+  scorers: [async ({ output, expected }) => toolCallAccuracy({ actualCalls: output, expectedCalls: expected })],
+})
+
+evalite('Evaluate Nuxt MCP Cross-Tool Workflows', {
+  data: async () => [
+    { input: 'I want to build an e-commerce site with Nuxt 4. What modules do I need and where should I deploy it?', expected: [{ toolName: 'list_modules' }, { toolName: 'list_deploy_providers' }] },
+    { input: 'Show me the latest features in Nuxt 4 and link to the relevant documentation', expected: [{ toolName: 'list_blog_posts' }, { toolName: 'get_documentation_page', input: { path: '/docs/4.x/getting-started/introduction' } }] },
+  ],
+  task: async (input) => {
+    const mcpClient = await createMCPClient({ transport: { type: 'http', url: MCP_URL } })
+    try {
+      const result = await generateText({ model, prompt: input, tools: await mcpClient.tools(), maxSteps: 5 })
+      return result.toolCalls ?? []
+    }
+    finally {
+      await mcpClient.close()
+    }
+  },
+  scorers: [async ({ output, expected }) => toolCallAccuracy({ actualCalls: output, expectedCalls: expected })],
+})
diff --git a/package.json b/package.json
@@ -10,6 +10,8 @@
     "lint": "eslint . --cache",
     "typecheck": "nuxt typecheck",
     "test": "pnpm lint && pnpm typecheck",
+    "eval": "evalite",
+    "eval:ui": "evalite --ui",
     "db:generate": "nuxt hub database generate",
     "db:migrate": "nuxt hub database migrate"
   },
@@ -54,15 +56,19 @@
     "valibot": "1.1.0"
   },
   "devDependencies": {
+    "@ai-sdk/mcp": "^0.0.8",
+    "@ai-sdk/openai": "3.0.0-beta.60",
     "@iconify-json/vscode-icons": "^1.2.33",
     "@nuxt/eslint": "^1.9.0",
     "@nuxt/test-utils": "^3.20.1",
     "@nuxtjs/turnstile": "^1.1.1",
     "@testing-library/vue": "^8.1.0",
     "@types/youtube": "^0.1.2",
+    "ai": "6.0.0-beta.99",
     "capture-website": "^5.0.1",
     "drizzle-kit": "^0.31.5",
     "eslint": "^9.38.0",
+    "evalite": "1.0.0-beta.13",
     "nuxt-content-twoslash": "0.1.2",
     "shiki": "^3.14.0",
     "twoslash": "^0.3.4",
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
diff --git a/server/routes/mcp.ts b/server/routes/mcp.ts