cloudflare · deloreyj · Apr 30, 2025 · Apr 30, 2025
diff --git a/apps/workers-bindings/evals/kv_namespaces.eval.ts b/apps/workers-bindings/evals/kv_namespaces.eval.ts
@@ -29,7 +29,7 @@ eachModel('$modelName', ({ model }) => {
 		},
 		scorers: [checkFactuality],
 		threshold: 1,
-		timeout: 60000, // 60 seconds
+		timeout: 60000,
 	})
 	describeEval('List Cloudflare KV Namespaces', {
 		data: async () => [
@@ -51,7 +51,7 @@ eachModel('$modelName', ({ model }) => {
 		},
 		scorers: [checkFactuality],
 		threshold: 1,
-		timeout: 60000, // 60 seconds
+		timeout: 60000,
 	})
 	describeEval('Rename Cloudflare KV Namespace', {
 		data: async () => [
@@ -74,7 +74,7 @@ eachModel('$modelName', ({ model }) => {
 		},
 		scorers: [checkFactuality],
 		threshold: 1,
-		timeout: 60000, // 60 seconds
+		timeout: 60000,
 	})
 	describeEval('Get Cloudflare KV Namespace Details', {
 		data: async () => [
@@ -96,7 +96,7 @@ eachModel('$modelName', ({ model }) => {
 		},
 		scorers: [checkFactuality],
 		threshold: 1,
-		timeout: 60000, // 60 seconds
+		timeout: 60000,
 	})
 	describeEval('Delete Cloudflare KV Namespace', {
 		data: async () => [
@@ -118,6 +118,6 @@ eachModel('$modelName', ({ model }) => {
 		},
 		scorers: [checkFactuality],
 		threshold: 1,
-		timeout: 60000, // 60 seconds
+		timeout: 60000,
 	})
 })
diff --git a/apps/workers-bindings/evals/vectorize.eval.ts b/apps/workers-bindings/evals/vectorize.eval.ts
@@ -0,0 +1,197 @@
+import { expect } from 'vitest'
+import { describeEval } from 'vitest-evals'
+
+import { runTask } from '@repo/eval-tools/src/runTask'
+import { checkFactuality } from '@repo/eval-tools/src/scorers'
+import { eachModel } from '@repo/eval-tools/src/test-models'
+import { VECTORIZE_TOOLS } from '@repo/mcp-common/src/tools/vectorize'
+
+import { initializeClient } from './utils' // Assuming utils.ts will exist here
+
+const MOCK_INDEX_NAME = 'test-vectorize-index'
+const MOCK_INDEX_DESCRIPTION = 'A test index for evaluation'
+const MOCK_DIMENSIONS = 32
+const MOCK_METRIC = 'cosine'
+const MOCK_PRESET = '@cf/baai/bge-small-en-v1.5'
+
+eachModel('$modelName', ({ model }) => {
+	describeEval('Create Vectorize Index (Dimensions/Metric)', {
+		data: async () => [
+			{
+				input: `Create a Vectorize index named "${MOCK_INDEX_NAME}" with ${MOCK_DIMENSIONS} dimensions using the "${MOCK_METRIC}" metric. Add description: "${MOCK_INDEX_DESCRIPTION}".`,
+				expected: `The ${VECTORIZE_TOOLS.vectorize_index_create} tool should be called with name "${MOCK_INDEX_NAME}", config specifying ${MOCK_DIMENSIONS} dimensions and "${MOCK_METRIC}" metric, and description "${MOCK_INDEX_DESCRIPTION}".`,
+			},
+		],
+		task: async (input: string) => {
+			const client = await initializeClient()
+			const { promptOutput, toolCalls } = await runTask(client, model, input)
+			const toolCall = toolCalls.find(
+				(call) => call.toolName === VECTORIZE_TOOLS.vectorize_index_create
+			)
+			expect(toolCall, 'Tool vectorize_index_create was not called').toBeDefined()
+			expect(toolCall?.args, 'Arguments did not match').toEqual(
+				expect.objectContaining({
+					name: MOCK_INDEX_NAME,
+					config: expect.objectContaining({
+						dimensions: MOCK_DIMENSIONS,
+						metric: MOCK_METRIC,
+					}),
+					description: MOCK_INDEX_DESCRIPTION,
+				})
+			)
+			return promptOutput
+		},
+		scorers: [checkFactuality],
+		threshold: 1,
+		timeout: 60000,
+	})
+
+	// --- Test vectorize_index_create (with preset) ---
+	describeEval('Create Vectorize Index (Preset)', {
+		data: async () => [
+			{
+				input: `Create a Vectorize index named "${MOCK_INDEX_NAME}-preset" using the "${MOCK_PRESET}" preset.`,
+				expected: `The ${VECTORIZE_TOOLS.vectorize_index_create} tool should be called with name "${MOCK_INDEX_NAME}-preset" and config specifying the preset "${MOCK_PRESET}".`,
+			},
+		],
+		task: async (input: string) => {
+			const client = await initializeClient()
+			const { promptOutput, toolCalls } = await runTask(client, model, input)
+			const toolCall = toolCalls.find(
+				(call) => call.toolName === VECTORIZE_TOOLS.vectorize_index_create
+			)
+			expect(toolCall, 'Tool vectorize_index_create was not called').toBeDefined()
+			expect(toolCall?.args, 'Arguments did not match').toEqual(
+				expect.objectContaining({
+					name: `${MOCK_INDEX_NAME}-preset`,
+					config: expect.objectContaining({
+						preset: MOCK_PRESET,
+					}),
+				})
+			)
+			return promptOutput
+		},
+		scorers: [checkFactuality],
+		threshold: 1,
+		timeout: 60000,
+	})
+
+	// --- Test vectorize_index_list ---
+	describeEval('List Vectorize Indexes', {
+		data: async () => [
+			{
+				input: 'List my Vectorize indexes.',
+				expected: `The ${VECTORIZE_TOOLS.vectorize_index_list} tool should be called.`,
+			},
+			{
+				input: 'Show me page 2 of my Vectorize indexes, 10 per page, ordered by name descending.',
+				expected:
+					'The ${VECTORIZE_TOOLS.vectorize_index_list} tool should be called with page 2, per_page 10, order name, direction desc.',
+			},
+		],
+		task: async (input: string) => {
+			const client = await initializeClient()
+			const { promptOutput, toolCalls } = await runTask(client, model, input)
+			const toolCall = toolCalls.find(
+				(call) => call.toolName === VECTORIZE_TOOLS.vectorize_index_list
+			)
+			expect(toolCall, 'Tool vectorize_index_list was not called').toBeDefined()
+
+			// Check specific args only for the pagination case
+			if (input.includes('page 2')) {
+				expect(toolCall?.args, 'Pagination arguments did not match').toEqual(
+					expect.objectContaining({
+						page: 2,
+						per_page: 10,
+						order: 'name',
+						direction: 'desc',
+					})
+				)
+			}
+
+			return promptOutput
+		},
+		scorers: [checkFactuality],
+		threshold: 1,
+		timeout: 60000,
+	})
+
+	// --- Test vectorize_index_get ---
+	describeEval('Get Vectorize Index Details', {
+		data: async () => [
+			{
+				input: `Get the details for the Vectorize index named "${MOCK_INDEX_NAME}".`,
+				expected: `The ${VECTORIZE_TOOLS.vectorize_index_get} tool should be called with name "${MOCK_INDEX_NAME}".`,
+			},
+		],
+		task: async (input: string) => {
+			const client = await initializeClient()
+			const { promptOutput, toolCalls } = await runTask(client, model, input)
+			const toolCall = toolCalls.find(
+				(call) => call.toolName === VECTORIZE_TOOLS.vectorize_index_get
+			)
+			expect(toolCall, 'Tool vectorize_index_get was not called').toBeDefined()
+			expect(toolCall?.args, 'Arguments did not match').toEqual(
+				expect.objectContaining({
+					name: MOCK_INDEX_NAME,
+				})
+			)
+			return promptOutput
+		},
+		scorers: [checkFactuality],
+		threshold: 1,
+		timeout: 60000,
+	})
+
+	describeEval('Get Vectorize Index Info', {
+		data: async () => [
+			{
+				input: `Get operational info for the Vectorize index "${MOCK_INDEX_NAME}".`,
+				expected: `The ${VECTORIZE_TOOLS.vectorize_index_info} tool should be called with name "${MOCK_INDEX_NAME}".`,
+			},
+		],
+		task: async (input: string) => {
+			const client = await initializeClient()
+			const { promptOutput, toolCalls } = await runTask(client, model, input)
+			const toolCall = toolCalls.find(
+				(call) => call.toolName === VECTORIZE_TOOLS.vectorize_index_info
+			)
+			expect(toolCall, 'Tool vectorize_index_info was not called').toBeDefined()
+			expect(toolCall?.args, 'Arguments did not match').toEqual(
+				expect.objectContaining({
+					name: MOCK_INDEX_NAME,
+				})
+			)
+			return promptOutput
+		},
+		scorers: [checkFactuality],
+		threshold: 1,
+		timeout: 60000,
+	})
+
+	describeEval('Delete Vectorize Index', {
+		data: async () => [
+			{
+				input: `Delete the Vectorize index named "${MOCK_INDEX_NAME}".`,
+				expected: `The ${VECTORIZE_TOOLS.vectorize_index_delete} tool should be called with name "${MOCK_INDEX_NAME}".`,
+			},
+		],
+		task: async (input: string) => {
+			const client = await initializeClient()
+			const { promptOutput, toolCalls } = await runTask(client, model, input)
+			const toolCall = toolCalls.find(
+				(call) => call.toolName === VECTORIZE_TOOLS.vectorize_index_delete
+			)
+			expect(toolCall, 'Tool vectorize_index_delete was not called').toBeDefined()
+			expect(toolCall?.args, 'Arguments did not match').toEqual(
+				expect.objectContaining({
+					name: MOCK_INDEX_NAME,
+				})
+			)
+			return promptOutput
+		},
+		scorers: [checkFactuality],
+		threshold: 1,
+		timeout: 60000,
+	})
+})
diff --git a/apps/workers-bindings/src/index.ts b/apps/workers-bindings/src/index.ts
@@ -15,6 +15,7 @@ import { registerD1Tools } from '@repo/mcp-common/src/tools/d1'
 import { registerHyperdriveTools } from '@repo/mcp-common/src/tools/hyperdrive'
 import { registerKVTools } from '@repo/mcp-common/src/tools/kv_namespace'
 import { registerR2BucketTools } from '@repo/mcp-common/src/tools/r2_bucket'
+import { registerVectorizeTools } from '@repo/mcp-common/src/tools/vectorize'
 import { registerWorkersTools } from '@repo/mcp-common/src/tools/worker'
 import { MetricsTracker } from '@repo/mcp-observability'
 
@@ -74,6 +75,7 @@ export class WorkersBindingsMCP extends McpAgent<Env, WorkersBindingsMCPState, P
 		registerR2BucketTools(this)
 		registerD1Tools(this)
 		registerHyperdriveTools(this)
+		registerVectorizeTools(this)
 	}
 
 	async getActiveAccountId() {