|
| 1 | +import { expect } from 'vitest' |
| 2 | +import { describeEval } from 'vitest-evals' |
| 3 | + |
| 4 | +import { checkFactuality } from '@repo/eval-tools/src/scorers' |
| 5 | +import { eachModel } from '@repo/eval-tools/src/test-models' |
| 6 | + |
| 7 | +import { initializeClient, runTask } from './utils' // Assuming utils.ts will exist here |
| 8 | + |
| 9 | +eachModel('$modelName', ({ model }) => { |
| 10 | + describeEval('Create Cloudflare KV Namespace', { |
| 11 | + data: async () => [ |
| 12 | + { |
| 13 | + input: 'Create a new Cloudflare KV Namespace called "my-test-namespace".', |
| 14 | + expected: 'The kv_namespaces_create tool should be called to create a new kv namespace.', |
| 15 | + }, |
| 16 | + ], |
| 17 | + task: async (input: string) => { |
| 18 | + const client = await initializeClient(/* Pass necessary mocks/config */) |
| 19 | + const { promptOutput, toolCalls } = await runTask(client, model, input) |
| 20 | + |
| 21 | + const toolCall = toolCalls.find((call) => call.toolName === 'kv_namespace_create') |
| 22 | + expect(toolCall, 'Tool kv_namespace_create was not called').toBeDefined() |
| 23 | + |
| 24 | + return promptOutput |
| 25 | + }, |
| 26 | + scorers: [checkFactuality], |
| 27 | + threshold: 1, |
| 28 | + timeout: 60000, // 60 seconds |
| 29 | + }) |
| 30 | + describeEval('List Cloudflare KV Namespaces', { |
| 31 | + data: async () => [ |
| 32 | + { |
| 33 | + input: 'List all my Cloudflare KV Namespaces.', |
| 34 | + expected: |
| 35 | + 'The kv_namespaces_list tool should be called to retrieve the list of kv namespaces. There should be at least one kv namespace in the list.', |
| 36 | + }, |
| 37 | + ], |
| 38 | + task: async (input: string) => { |
| 39 | + const client = await initializeClient(/* Pass necessary mocks/config */) |
| 40 | + const { promptOutput, toolCalls, fullResult } = await runTask(client, model, input) |
| 41 | + |
| 42 | + const toolCall = toolCalls.find((call) => call.toolName === 'kv_namespaces_list') |
| 43 | + expect(toolCall, 'Tool kv_namespaces_list was not called').toBeDefined() |
| 44 | + |
| 45 | + return promptOutput |
| 46 | + }, |
| 47 | + scorers: [checkFactuality], |
| 48 | + threshold: 1, |
| 49 | + timeout: 60000, // 60 seconds |
| 50 | + }) |
| 51 | + describeEval('Rename Cloudflare KV Namespace', { |
| 52 | + data: async () => [ |
| 53 | + { |
| 54 | + input: |
| 55 | + 'Rename my Cloudflare KV Namespace called "my-test-namespace" to "my-new-test-namespace".', |
| 56 | + expected: 'The kv_namespace_update tool should be called to rename the kv namespace.', |
| 57 | + }, |
| 58 | + ], |
| 59 | + task: async (input: string) => { |
| 60 | + const client = await initializeClient(/* Pass necessary mocks/config */) |
| 61 | + const { promptOutput, toolCalls, fullResult } = await runTask(client, model, input) |
| 62 | + |
| 63 | + const toolCall = toolCalls.find((call) => call.toolName === 'kv_namespace_update') |
| 64 | + expect(toolCall, 'Tool kv_namespace_update was not called').toBeDefined() |
| 65 | + |
| 66 | + return promptOutput |
| 67 | + }, |
| 68 | + scorers: [checkFactuality], |
| 69 | + threshold: 1, |
| 70 | + timeout: 60000, // 60 seconds |
| 71 | + }) |
| 72 | + describeEval('Get Cloudflare KV Namespace Details', { |
| 73 | + data: async () => [ |
| 74 | + { |
| 75 | + input: 'Get details of my Cloudflare KV Namespace called "my-new-test-namespace".', |
| 76 | + expected: |
| 77 | + 'The kv_namespace_get tool should be called to retrieve the details of the kv namespace.', |
| 78 | + }, |
| 79 | + ], |
| 80 | + task: async (input: string) => { |
| 81 | + const client = await initializeClient(/* Pass necessary mocks/config */) |
| 82 | + const { promptOutput, toolCalls, fullResult } = await runTask(client, model, input) |
| 83 | + |
| 84 | + console.log('fullResult', JSON.stringify(await fullResult.response, null, 2)) |
| 85 | + const toolCall = toolCalls.find((call) => call.toolName === 'kv_namespace_get') |
| 86 | + expect(toolCall, 'Tool kv_namespace_get was not called').toBeDefined() |
| 87 | + |
| 88 | + return promptOutput |
| 89 | + }, |
| 90 | + scorers: [checkFactuality], |
| 91 | + threshold: 1, |
| 92 | + timeout: 60000, // 60 seconds |
| 93 | + }) |
| 94 | + describeEval('Delete Cloudflare KV Namespace', { |
| 95 | + data: async () => [ |
| 96 | + { |
| 97 | + input: 'Look up the id of my only KV namespace and delete it.', |
| 98 | + expected: 'The kv_namespace_delete tool should be called to delete the kv namespace.', |
| 99 | + }, |
| 100 | + ], |
| 101 | + task: async (input: string) => { |
| 102 | + const client = await initializeClient(/* Pass necessary mocks/config */) |
| 103 | + const { promptOutput, toolCalls, fullResult } = await runTask(client, model, input) |
| 104 | + |
| 105 | + const toolCall = toolCalls.find((call) => call.toolName === 'kv_namespace_delete') |
| 106 | + expect(toolCall, 'Tool kv_namespace_delete was not called').toBeDefined() |
| 107 | + |
| 108 | + return promptOutput |
| 109 | + }, |
| 110 | + scorers: [checkFactuality], |
| 111 | + threshold: 1, |
| 112 | + timeout: 60000, // 60 seconds |
| 113 | + }) |
| 114 | +}) |
0 commit comments