|
| 1 | +import { expect } from 'vitest' |
| 2 | +import { describeEval } from 'vitest-evals' |
| 3 | + |
| 4 | +import { checkFactuality } from '@repo/eval-tools/src/scorers' |
| 5 | +import { eachModel } from '@repo/eval-tools/src/test-models' |
| 6 | + |
| 7 | +import { initializeClient, runTask } from './utils' // Assuming utils.ts will exist here |
| 8 | + |
| 9 | +eachModel('$modelName', ({ model }) => { |
| 10 | + describeEval('KV Namespaces Tool Evaluations', { |
| 11 | + data: async () => [ |
| 12 | + { |
| 13 | + input: 'Create a new Cloudflare KV Namespace called "my-test-namespace".', |
| 14 | + expected: 'The kv_namespaces_create tool should be called to create a new kv namespace.', |
| 15 | + }, |
| 16 | + { |
| 17 | + input: 'List all my Cloudflare KV Namespaces.', |
| 18 | + expected: 'The kv_namespaces_list tool should be called to retrieve the list of kv namespaces. There should be at least one kv namespace in the list.', |
| 19 | + }, |
| 20 | + { |
| 21 | + input: 'Rename my Cloudflare KV Namespace called "my-test-namespace" to "my-new-test-namespace".', |
| 22 | + expected: 'The kv_namespace_update tool should be called to rename the kv namespace.', |
| 23 | + }, |
| 24 | + { |
| 25 | + input: 'Get details of my Cloudflare KV Namespace called "my-new-test-namespace".', |
| 26 | + expected: 'The kv_namespace_get tool should be called to retrieve the details of the kv namespace.', |
| 27 | + }, |
| 28 | + { |
| 29 | + input: 'Look up the id of my only KV namespace and delete it.', |
| 30 | + expected: 'The kv_namespace_delete tool should be called to delete the kv namespace.', |
| 31 | + }, |
| 32 | + |
| 33 | + |
| 34 | + ], |
| 35 | + task: async (input: string) => { |
| 36 | + const client = await initializeClient(/* Pass necessary mocks/config */) |
| 37 | + const { promptOutput, toolCalls, fullResult } = await runTask(client, model, input) |
| 38 | + |
| 39 | + if (input.includes('List all my Cloudflare KV Namespaces')) { |
| 40 | + console.log('fullResult', JSON.stringify(await fullResult.response, null, 2)) |
| 41 | + const toolCall = toolCalls.find((call) => call.toolName === 'kv_namespaces_list') |
| 42 | + expect(toolCall, 'Tool kv_namespaces_list was not called').toBeDefined() |
| 43 | + } |
| 44 | + |
| 45 | + return promptOutput |
| 46 | + }, |
| 47 | + scorers: [checkFactuality], |
| 48 | + threshold: 1, |
| 49 | + timeout: 60000, // 60 seconds |
| 50 | + }) |
| 51 | +}) |
0 commit comments