|
| 1 | +import { expect } from 'vitest' |
| 2 | +import { describeEval } from 'vitest-evals' |
| 3 | + |
| 4 | +import { runTask } from '@repo/eval-tools/src/runTask' |
| 5 | +import { checkFactuality } from '@repo/eval-tools/src/scorers' |
| 6 | +import { eachModel } from '@repo/eval-tools/src/test-models' |
| 7 | +import { VECTORIZE_TOOLS } from '@repo/mcp-common/src/tools/vectorize' |
| 8 | + |
| 9 | +import { initializeClient } from './utils' // Assuming utils.ts will exist here |
| 10 | + |
| 11 | +const MOCK_INDEX_NAME = 'test-vectorize-index' |
| 12 | +const MOCK_INDEX_DESCRIPTION = 'A test index for evaluation' |
| 13 | +const MOCK_DIMENSIONS = 32 |
| 14 | +const MOCK_METRIC = 'cosine' |
| 15 | +const MOCK_PRESET = '@cf/baai/bge-small-en-v1.5' |
| 16 | + |
| 17 | +eachModel('$modelName', ({ model }) => { |
| 18 | + describeEval('Create Vectorize Index (Dimensions/Metric)', { |
| 19 | + data: async () => [ |
| 20 | + { |
| 21 | + input: `Create a Vectorize index named "${MOCK_INDEX_NAME}" with ${MOCK_DIMENSIONS} dimensions using the "${MOCK_METRIC}" metric. Add description: "${MOCK_INDEX_DESCRIPTION}".`, |
| 22 | + expected: `The ${VECTORIZE_TOOLS.vectorize_index_create} tool should be called with name "${MOCK_INDEX_NAME}", config specifying ${MOCK_DIMENSIONS} dimensions and "${MOCK_METRIC}" metric, and description "${MOCK_INDEX_DESCRIPTION}".`, |
| 23 | + }, |
| 24 | + ], |
| 25 | + task: async (input: string) => { |
| 26 | + const client = await initializeClient() |
| 27 | + const { promptOutput, toolCalls } = await runTask(client, model, input) |
| 28 | + const toolCall = toolCalls.find( |
| 29 | + (call) => call.toolName === VECTORIZE_TOOLS.vectorize_index_create |
| 30 | + ) |
| 31 | + expect(toolCall, 'Tool vectorize_index_create was not called').toBeDefined() |
| 32 | + expect(toolCall?.args, 'Arguments did not match').toEqual( |
| 33 | + expect.objectContaining({ |
| 34 | + name: MOCK_INDEX_NAME, |
| 35 | + config: expect.objectContaining({ |
| 36 | + dimensions: MOCK_DIMENSIONS, |
| 37 | + metric: MOCK_METRIC, |
| 38 | + }), |
| 39 | + description: MOCK_INDEX_DESCRIPTION, |
| 40 | + }) |
| 41 | + ) |
| 42 | + return promptOutput |
| 43 | + }, |
| 44 | + scorers: [checkFactuality], |
| 45 | + threshold: 1, |
| 46 | + timeout: 60000, |
| 47 | + }) |
| 48 | + |
| 49 | + // --- Test vectorize_index_create (with preset) --- |
| 50 | + describeEval('Create Vectorize Index (Preset)', { |
| 51 | + data: async () => [ |
| 52 | + { |
| 53 | + input: `Create a Vectorize index named "${MOCK_INDEX_NAME}-preset" using the "${MOCK_PRESET}" preset.`, |
| 54 | + expected: `The ${VECTORIZE_TOOLS.vectorize_index_create} tool should be called with name "${MOCK_INDEX_NAME}-preset" and config specifying the preset "${MOCK_PRESET}".`, |
| 55 | + }, |
| 56 | + ], |
| 57 | + task: async (input: string) => { |
| 58 | + const client = await initializeClient() |
| 59 | + const { promptOutput, toolCalls } = await runTask(client, model, input) |
| 60 | + const toolCall = toolCalls.find( |
| 61 | + (call) => call.toolName === VECTORIZE_TOOLS.vectorize_index_create |
| 62 | + ) |
| 63 | + expect(toolCall, 'Tool vectorize_index_create was not called').toBeDefined() |
| 64 | + expect(toolCall?.args, 'Arguments did not match').toEqual( |
| 65 | + expect.objectContaining({ |
| 66 | + name: `${MOCK_INDEX_NAME}-preset`, |
| 67 | + config: expect.objectContaining({ |
| 68 | + preset: MOCK_PRESET, |
| 69 | + }), |
| 70 | + }) |
| 71 | + ) |
| 72 | + return promptOutput |
| 73 | + }, |
| 74 | + scorers: [checkFactuality], |
| 75 | + threshold: 1, |
| 76 | + timeout: 60000, |
| 77 | + }) |
| 78 | + |
| 79 | + // --- Test vectorize_index_list --- |
| 80 | + describeEval('List Vectorize Indexes', { |
| 81 | + data: async () => [ |
| 82 | + { |
| 83 | + input: 'List my Vectorize indexes.', |
| 84 | + expected: `The ${VECTORIZE_TOOLS.vectorize_index_list} tool should be called.`, |
| 85 | + }, |
| 86 | + { |
| 87 | + input: 'Show me page 2 of my Vectorize indexes, 10 per page, ordered by name descending.', |
| 88 | + expected: |
| 89 | + 'The ${VECTORIZE_TOOLS.vectorize_index_list} tool should be called with page 2, per_page 10, order name, direction desc.', |
| 90 | + }, |
| 91 | + ], |
| 92 | + task: async (input: string) => { |
| 93 | + const client = await initializeClient() |
| 94 | + const { promptOutput, toolCalls } = await runTask(client, model, input) |
| 95 | + const toolCall = toolCalls.find( |
| 96 | + (call) => call.toolName === VECTORIZE_TOOLS.vectorize_index_list |
| 97 | + ) |
| 98 | + expect(toolCall, 'Tool vectorize_index_list was not called').toBeDefined() |
| 99 | + |
| 100 | + // Check specific args only for the pagination case |
| 101 | + if (input.includes('page 2')) { |
| 102 | + expect(toolCall?.args, 'Pagination arguments did not match').toEqual( |
| 103 | + expect.objectContaining({ |
| 104 | + page: 2, |
| 105 | + per_page: 10, |
| 106 | + order: 'name', |
| 107 | + direction: 'desc', |
| 108 | + }) |
| 109 | + ) |
| 110 | + } |
| 111 | + |
| 112 | + return promptOutput |
| 113 | + }, |
| 114 | + scorers: [checkFactuality], |
| 115 | + threshold: 1, |
| 116 | + timeout: 60000, |
| 117 | + }) |
| 118 | + |
| 119 | + // --- Test vectorize_index_get --- |
| 120 | + describeEval('Get Vectorize Index Details', { |
| 121 | + data: async () => [ |
| 122 | + { |
| 123 | + input: `Get the details for the Vectorize index named "${MOCK_INDEX_NAME}".`, |
| 124 | + expected: `The ${VECTORIZE_TOOLS.vectorize_index_get} tool should be called with name "${MOCK_INDEX_NAME}".`, |
| 125 | + }, |
| 126 | + ], |
| 127 | + task: async (input: string) => { |
| 128 | + const client = await initializeClient() |
| 129 | + const { promptOutput, toolCalls } = await runTask(client, model, input) |
| 130 | + const toolCall = toolCalls.find( |
| 131 | + (call) => call.toolName === VECTORIZE_TOOLS.vectorize_index_get |
| 132 | + ) |
| 133 | + expect(toolCall, 'Tool vectorize_index_get was not called').toBeDefined() |
| 134 | + expect(toolCall?.args, 'Arguments did not match').toEqual( |
| 135 | + expect.objectContaining({ |
| 136 | + name: MOCK_INDEX_NAME, |
| 137 | + }) |
| 138 | + ) |
| 139 | + return promptOutput |
| 140 | + }, |
| 141 | + scorers: [checkFactuality], |
| 142 | + threshold: 1, |
| 143 | + timeout: 60000, |
| 144 | + }) |
| 145 | + |
| 146 | + describeEval('Get Vectorize Index Info', { |
| 147 | + data: async () => [ |
| 148 | + { |
| 149 | + input: `Get operational info for the Vectorize index "${MOCK_INDEX_NAME}".`, |
| 150 | + expected: `The ${VECTORIZE_TOOLS.vectorize_index_info} tool should be called with name "${MOCK_INDEX_NAME}".`, |
| 151 | + }, |
| 152 | + ], |
| 153 | + task: async (input: string) => { |
| 154 | + const client = await initializeClient() |
| 155 | + const { promptOutput, toolCalls } = await runTask(client, model, input) |
| 156 | + const toolCall = toolCalls.find( |
| 157 | + (call) => call.toolName === VECTORIZE_TOOLS.vectorize_index_info |
| 158 | + ) |
| 159 | + expect(toolCall, 'Tool vectorize_index_info was not called').toBeDefined() |
| 160 | + expect(toolCall?.args, 'Arguments did not match').toEqual( |
| 161 | + expect.objectContaining({ |
| 162 | + name: MOCK_INDEX_NAME, |
| 163 | + }) |
| 164 | + ) |
| 165 | + return promptOutput |
| 166 | + }, |
| 167 | + scorers: [checkFactuality], |
| 168 | + threshold: 1, |
| 169 | + timeout: 60000, |
| 170 | + }) |
| 171 | + |
| 172 | + describeEval('Delete Vectorize Index', { |
| 173 | + data: async () => [ |
| 174 | + { |
| 175 | + input: `Delete the Vectorize index named "${MOCK_INDEX_NAME}".`, |
| 176 | + expected: `The ${VECTORIZE_TOOLS.vectorize_index_delete} tool should be called with name "${MOCK_INDEX_NAME}".`, |
| 177 | + }, |
| 178 | + ], |
| 179 | + task: async (input: string) => { |
| 180 | + const client = await initializeClient() |
| 181 | + const { promptOutput, toolCalls } = await runTask(client, model, input) |
| 182 | + const toolCall = toolCalls.find( |
| 183 | + (call) => call.toolName === VECTORIZE_TOOLS.vectorize_index_delete |
| 184 | + ) |
| 185 | + expect(toolCall, 'Tool vectorize_index_delete was not called').toBeDefined() |
| 186 | + expect(toolCall?.args, 'Arguments did not match').toEqual( |
| 187 | + expect.objectContaining({ |
| 188 | + name: MOCK_INDEX_NAME, |
| 189 | + }) |
| 190 | + ) |
| 191 | + return promptOutput |
| 192 | + }, |
| 193 | + scorers: [checkFactuality], |
| 194 | + threshold: 1, |
| 195 | + timeout: 60000, |
| 196 | + }) |
| 197 | +}) |
0 commit comments