Skip to content

Commit 0cfa4ca

Browse files
committed
feat: vectorize tools
1 parent d8c6a6f commit 0cfa4ca

File tree

5 files changed

+238
-281
lines changed

5 files changed

+238
-281
lines changed

apps/workers-bindings/evals/kv_namespaces.eval.ts

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ eachModel('$modelName', ({ model }) => {
2929
},
3030
scorers: [checkFactuality],
3131
threshold: 1,
32-
timeout: 60000, // 60 seconds
32+
timeout: 60000,
3333
})
3434
describeEval('List Cloudflare KV Namespaces', {
3535
data: async () => [
@@ -51,7 +51,7 @@ eachModel('$modelName', ({ model }) => {
5151
},
5252
scorers: [checkFactuality],
5353
threshold: 1,
54-
timeout: 60000, // 60 seconds
54+
timeout: 60000,
5555
})
5656
describeEval('Rename Cloudflare KV Namespace', {
5757
data: async () => [
@@ -74,7 +74,7 @@ eachModel('$modelName', ({ model }) => {
7474
},
7575
scorers: [checkFactuality],
7676
threshold: 1,
77-
timeout: 60000, // 60 seconds
77+
timeout: 60000,
7878
})
7979
describeEval('Get Cloudflare KV Namespace Details', {
8080
data: async () => [
@@ -96,7 +96,7 @@ eachModel('$modelName', ({ model }) => {
9696
},
9797
scorers: [checkFactuality],
9898
threshold: 1,
99-
timeout: 60000, // 60 seconds
99+
timeout: 60000,
100100
})
101101
describeEval('Delete Cloudflare KV Namespace', {
102102
data: async () => [
@@ -118,6 +118,6 @@ eachModel('$modelName', ({ model }) => {
118118
},
119119
scorers: [checkFactuality],
120120
threshold: 1,
121-
timeout: 60000, // 60 seconds
121+
timeout: 60000,
122122
})
123123
})
Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
import { expect } from 'vitest'
2+
import { describeEval } from 'vitest-evals'
3+
4+
import { runTask } from '@repo/eval-tools/src/runTask'
5+
import { checkFactuality } from '@repo/eval-tools/src/scorers'
6+
import { eachModel } from '@repo/eval-tools/src/test-models'
7+
import { VECTORIZE_TOOLS } from '@repo/mcp-common/src/tools/vectorize'
8+
9+
import { initializeClient } from './utils' // Assuming utils.ts will exist here
10+
11+
const MOCK_INDEX_NAME = 'test-vectorize-index'
12+
const MOCK_INDEX_DESCRIPTION = 'A test index for evaluation'
13+
const MOCK_DIMENSIONS = 32
14+
const MOCK_METRIC = 'cosine'
15+
const MOCK_PRESET = '@cf/baai/bge-small-en-v1.5'
16+
17+
eachModel('$modelName', ({ model }) => {
18+
describeEval('Create Vectorize Index (Dimensions/Metric)', {
19+
data: async () => [
20+
{
21+
input: `Create a Vectorize index named "${MOCK_INDEX_NAME}" with ${MOCK_DIMENSIONS} dimensions using the "${MOCK_METRIC}" metric. Add description: "${MOCK_INDEX_DESCRIPTION}".`,
22+
expected: `The ${VECTORIZE_TOOLS.vectorize_index_create} tool should be called with name "${MOCK_INDEX_NAME}", config specifying ${MOCK_DIMENSIONS} dimensions and "${MOCK_METRIC}" metric, and description "${MOCK_INDEX_DESCRIPTION}".`,
23+
},
24+
],
25+
task: async (input: string) => {
26+
const client = await initializeClient()
27+
const { promptOutput, toolCalls } = await runTask(client, model, input)
28+
const toolCall = toolCalls.find(
29+
(call) => call.toolName === VECTORIZE_TOOLS.vectorize_index_create
30+
)
31+
expect(toolCall, 'Tool vectorize_index_create was not called').toBeDefined()
32+
expect(toolCall?.args, 'Arguments did not match').toEqual(
33+
expect.objectContaining({
34+
name: MOCK_INDEX_NAME,
35+
config: expect.objectContaining({
36+
dimensions: MOCK_DIMENSIONS,
37+
metric: MOCK_METRIC,
38+
}),
39+
description: MOCK_INDEX_DESCRIPTION,
40+
})
41+
)
42+
return promptOutput
43+
},
44+
scorers: [checkFactuality],
45+
threshold: 1,
46+
timeout: 60000,
47+
})
48+
49+
// --- Test vectorize_index_create (with preset) ---
50+
describeEval('Create Vectorize Index (Preset)', {
51+
data: async () => [
52+
{
53+
input: `Create a Vectorize index named "${MOCK_INDEX_NAME}-preset" using the "${MOCK_PRESET}" preset.`,
54+
expected: `The ${VECTORIZE_TOOLS.vectorize_index_create} tool should be called with name "${MOCK_INDEX_NAME}-preset" and config specifying the preset "${MOCK_PRESET}".`,
55+
},
56+
],
57+
task: async (input: string) => {
58+
const client = await initializeClient()
59+
const { promptOutput, toolCalls } = await runTask(client, model, input)
60+
const toolCall = toolCalls.find(
61+
(call) => call.toolName === VECTORIZE_TOOLS.vectorize_index_create
62+
)
63+
expect(toolCall, 'Tool vectorize_index_create was not called').toBeDefined()
64+
expect(toolCall?.args, 'Arguments did not match').toEqual(
65+
expect.objectContaining({
66+
name: `${MOCK_INDEX_NAME}-preset`,
67+
config: expect.objectContaining({
68+
preset: MOCK_PRESET,
69+
}),
70+
})
71+
)
72+
return promptOutput
73+
},
74+
scorers: [checkFactuality],
75+
threshold: 1,
76+
timeout: 60000,
77+
})
78+
79+
// --- Test vectorize_index_list ---
80+
describeEval('List Vectorize Indexes', {
81+
data: async () => [
82+
{
83+
input: 'List my Vectorize indexes.',
84+
expected: `The ${VECTORIZE_TOOLS.vectorize_index_list} tool should be called.`,
85+
},
86+
{
87+
input: 'Show me page 2 of my Vectorize indexes, 10 per page, ordered by name descending.',
88+
expected:
89+
'The ${VECTORIZE_TOOLS.vectorize_index_list} tool should be called with page 2, per_page 10, order name, direction desc.',
90+
},
91+
],
92+
task: async (input: string) => {
93+
const client = await initializeClient()
94+
const { promptOutput, toolCalls } = await runTask(client, model, input)
95+
const toolCall = toolCalls.find(
96+
(call) => call.toolName === VECTORIZE_TOOLS.vectorize_index_list
97+
)
98+
expect(toolCall, 'Tool vectorize_index_list was not called').toBeDefined()
99+
100+
// Check specific args only for the pagination case
101+
if (input.includes('page 2')) {
102+
expect(toolCall?.args, 'Pagination arguments did not match').toEqual(
103+
expect.objectContaining({
104+
page: 2,
105+
per_page: 10,
106+
order: 'name',
107+
direction: 'desc',
108+
})
109+
)
110+
}
111+
112+
return promptOutput
113+
},
114+
scorers: [checkFactuality],
115+
threshold: 1,
116+
timeout: 60000,
117+
})
118+
119+
// --- Test vectorize_index_get ---
120+
describeEval('Get Vectorize Index Details', {
121+
data: async () => [
122+
{
123+
input: `Get the details for the Vectorize index named "${MOCK_INDEX_NAME}".`,
124+
expected: `The ${VECTORIZE_TOOLS.vectorize_index_get} tool should be called with name "${MOCK_INDEX_NAME}".`,
125+
},
126+
],
127+
task: async (input: string) => {
128+
const client = await initializeClient()
129+
const { promptOutput, toolCalls } = await runTask(client, model, input)
130+
const toolCall = toolCalls.find(
131+
(call) => call.toolName === VECTORIZE_TOOLS.vectorize_index_get
132+
)
133+
expect(toolCall, 'Tool vectorize_index_get was not called').toBeDefined()
134+
expect(toolCall?.args, 'Arguments did not match').toEqual(
135+
expect.objectContaining({
136+
name: MOCK_INDEX_NAME,
137+
})
138+
)
139+
return promptOutput
140+
},
141+
scorers: [checkFactuality],
142+
threshold: 1,
143+
timeout: 60000,
144+
})
145+
146+
describeEval('Get Vectorize Index Info', {
147+
data: async () => [
148+
{
149+
input: `Get operational info for the Vectorize index "${MOCK_INDEX_NAME}".`,
150+
expected: `The ${VECTORIZE_TOOLS.vectorize_index_info} tool should be called with name "${MOCK_INDEX_NAME}".`,
151+
},
152+
],
153+
task: async (input: string) => {
154+
const client = await initializeClient()
155+
const { promptOutput, toolCalls } = await runTask(client, model, input)
156+
const toolCall = toolCalls.find(
157+
(call) => call.toolName === VECTORIZE_TOOLS.vectorize_index_info
158+
)
159+
expect(toolCall, 'Tool vectorize_index_info was not called').toBeDefined()
160+
expect(toolCall?.args, 'Arguments did not match').toEqual(
161+
expect.objectContaining({
162+
name: MOCK_INDEX_NAME,
163+
})
164+
)
165+
return promptOutput
166+
},
167+
scorers: [checkFactuality],
168+
threshold: 1,
169+
timeout: 60000,
170+
})
171+
172+
describeEval('Delete Vectorize Index', {
173+
data: async () => [
174+
{
175+
input: `Delete the Vectorize index named "${MOCK_INDEX_NAME}".`,
176+
expected: `The ${VECTORIZE_TOOLS.vectorize_index_delete} tool should be called with name "${MOCK_INDEX_NAME}".`,
177+
},
178+
],
179+
task: async (input: string) => {
180+
const client = await initializeClient()
181+
const { promptOutput, toolCalls } = await runTask(client, model, input)
182+
const toolCall = toolCalls.find(
183+
(call) => call.toolName === VECTORIZE_TOOLS.vectorize_index_delete
184+
)
185+
expect(toolCall, 'Tool vectorize_index_delete was not called').toBeDefined()
186+
expect(toolCall?.args, 'Arguments did not match').toEqual(
187+
expect.objectContaining({
188+
name: MOCK_INDEX_NAME,
189+
})
190+
)
191+
return promptOutput
192+
},
193+
scorers: [checkFactuality],
194+
threshold: 1,
195+
timeout: 60000,
196+
})
197+
})

apps/workers-bindings/src/index.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import { registerD1Tools } from '@repo/mcp-common/src/tools/d1'
1515
import { registerHyperdriveTools } from '@repo/mcp-common/src/tools/hyperdrive'
1616
import { registerKVTools } from '@repo/mcp-common/src/tools/kv_namespace'
1717
import { registerR2BucketTools } from '@repo/mcp-common/src/tools/r2_bucket'
18+
import { registerVectorizeTools } from '@repo/mcp-common/src/tools/vectorize'
1819
import { registerWorkersTools } from '@repo/mcp-common/src/tools/worker'
1920
import { MetricsTracker } from '@repo/mcp-observability'
2021

@@ -74,6 +75,7 @@ export class WorkersBindingsMCP extends McpAgent<Env, WorkersBindingsMCPState, P
7475
registerR2BucketTools(this)
7576
registerD1Tools(this)
7677
registerHyperdriveTools(this)
78+
registerVectorizeTools(this)
7779
}
7880

7981
async getActiveAccountId() {

0 commit comments

Comments
 (0)