Skip to content

Vectorize tools #147

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions apps/workers-bindings/evals/kv_namespaces.eval.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ eachModel('$modelName', ({ model }) => {
},
scorers: [checkFactuality],
threshold: 1,
timeout: 60000, // 60 seconds
timeout: 60000,
})
describeEval('List Cloudflare KV Namespaces', {
data: async () => [
Expand All @@ -51,7 +51,7 @@ eachModel('$modelName', ({ model }) => {
},
scorers: [checkFactuality],
threshold: 1,
timeout: 60000, // 60 seconds
timeout: 60000,
})
describeEval('Rename Cloudflare KV Namespace', {
data: async () => [
Expand All @@ -74,7 +74,7 @@ eachModel('$modelName', ({ model }) => {
},
scorers: [checkFactuality],
threshold: 1,
timeout: 60000, // 60 seconds
timeout: 60000,
})
describeEval('Get Cloudflare KV Namespace Details', {
data: async () => [
Expand All @@ -96,7 +96,7 @@ eachModel('$modelName', ({ model }) => {
},
scorers: [checkFactuality],
threshold: 1,
timeout: 60000, // 60 seconds
timeout: 60000,
})
describeEval('Delete Cloudflare KV Namespace', {
data: async () => [
Expand All @@ -118,6 +118,6 @@ eachModel('$modelName', ({ model }) => {
},
scorers: [checkFactuality],
threshold: 1,
timeout: 60000, // 60 seconds
timeout: 60000,
})
})
197 changes: 197 additions & 0 deletions apps/workers-bindings/evals/vectorize.eval.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
import { expect } from 'vitest'
import { describeEval } from 'vitest-evals'

import { runTask } from '@repo/eval-tools/src/runTask'
import { checkFactuality } from '@repo/eval-tools/src/scorers'
import { eachModel } from '@repo/eval-tools/src/test-models'
import { VECTORIZE_TOOLS } from '@repo/mcp-common/src/tools/vectorize'

import { initializeClient } from './utils' // Assuming utils.ts will exist here

const MOCK_INDEX_NAME = 'test-vectorize-index'
const MOCK_INDEX_DESCRIPTION = 'A test index for evaluation'
const MOCK_DIMENSIONS = 32
const MOCK_METRIC = 'cosine'
const MOCK_PRESET = '@cf/baai/bge-small-en-v1.5'

eachModel('$modelName', ({ model }) => {
describeEval('Create Vectorize Index (Dimensions/Metric)', {
data: async () => [
{
input: `Create a Vectorize index named "${MOCK_INDEX_NAME}" with ${MOCK_DIMENSIONS} dimensions using the "${MOCK_METRIC}" metric. Add description: "${MOCK_INDEX_DESCRIPTION}".`,
expected: `The ${VECTORIZE_TOOLS.vectorize_index_create} tool should be called with name "${MOCK_INDEX_NAME}", config specifying ${MOCK_DIMENSIONS} dimensions and "${MOCK_METRIC}" metric, and description "${MOCK_INDEX_DESCRIPTION}".`,
},
],
task: async (input: string) => {
const client = await initializeClient()
const { promptOutput, toolCalls } = await runTask(client, model, input)
const toolCall = toolCalls.find(
(call) => call.toolName === VECTORIZE_TOOLS.vectorize_index_create
)
expect(toolCall, 'Tool vectorize_index_create was not called').toBeDefined()
expect(toolCall?.args, 'Arguments did not match').toEqual(
expect.objectContaining({
name: MOCK_INDEX_NAME,
config: expect.objectContaining({
dimensions: MOCK_DIMENSIONS,
metric: MOCK_METRIC,
}),
description: MOCK_INDEX_DESCRIPTION,
})
)
return promptOutput
},
scorers: [checkFactuality],
threshold: 1,
timeout: 60000,
})

// --- Test vectorize_index_create (with preset) ---
describeEval('Create Vectorize Index (Preset)', {
data: async () => [
{
input: `Create a Vectorize index named "${MOCK_INDEX_NAME}-preset" using the "${MOCK_PRESET}" preset.`,
expected: `The ${VECTORIZE_TOOLS.vectorize_index_create} tool should be called with name "${MOCK_INDEX_NAME}-preset" and config specifying the preset "${MOCK_PRESET}".`,
},
],
task: async (input: string) => {
const client = await initializeClient()
const { promptOutput, toolCalls } = await runTask(client, model, input)
const toolCall = toolCalls.find(
(call) => call.toolName === VECTORIZE_TOOLS.vectorize_index_create
)
expect(toolCall, 'Tool vectorize_index_create was not called').toBeDefined()
expect(toolCall?.args, 'Arguments did not match').toEqual(
expect.objectContaining({
name: `${MOCK_INDEX_NAME}-preset`,
config: expect.objectContaining({
preset: MOCK_PRESET,
}),
})
)
return promptOutput
},
scorers: [checkFactuality],
threshold: 1,
timeout: 60000,
})

// --- Test vectorize_index_list ---
describeEval('List Vectorize Indexes', {
data: async () => [
{
input: 'List my Vectorize indexes.',
expected: `The ${VECTORIZE_TOOLS.vectorize_index_list} tool should be called.`,
},
{
input: 'Show me page 2 of my Vectorize indexes, 10 per page, ordered by name descending.',
expected:
'The ${VECTORIZE_TOOLS.vectorize_index_list} tool should be called with page 2, per_page 10, order name, direction desc.',
},
],
task: async (input: string) => {
const client = await initializeClient()
const { promptOutput, toolCalls } = await runTask(client, model, input)
const toolCall = toolCalls.find(
(call) => call.toolName === VECTORIZE_TOOLS.vectorize_index_list
)
expect(toolCall, 'Tool vectorize_index_list was not called').toBeDefined()

// Check specific args only for the pagination case
if (input.includes('page 2')) {
expect(toolCall?.args, 'Pagination arguments did not match').toEqual(
expect.objectContaining({
page: 2,
per_page: 10,
order: 'name',
direction: 'desc',
})
)
}

return promptOutput
},
scorers: [checkFactuality],
threshold: 1,
timeout: 60000,
})

// --- Test vectorize_index_get ---
describeEval('Get Vectorize Index Details', {
data: async () => [
{
input: `Get the details for the Vectorize index named "${MOCK_INDEX_NAME}".`,
expected: `The ${VECTORIZE_TOOLS.vectorize_index_get} tool should be called with name "${MOCK_INDEX_NAME}".`,
},
],
task: async (input: string) => {
const client = await initializeClient()
const { promptOutput, toolCalls } = await runTask(client, model, input)
const toolCall = toolCalls.find(
(call) => call.toolName === VECTORIZE_TOOLS.vectorize_index_get
)
expect(toolCall, 'Tool vectorize_index_get was not called').toBeDefined()
expect(toolCall?.args, 'Arguments did not match').toEqual(
expect.objectContaining({
name: MOCK_INDEX_NAME,
})
)
return promptOutput
},
scorers: [checkFactuality],
threshold: 1,
timeout: 60000,
})

describeEval('Get Vectorize Index Info', {
data: async () => [
{
input: `Get operational info for the Vectorize index "${MOCK_INDEX_NAME}".`,
expected: `The ${VECTORIZE_TOOLS.vectorize_index_info} tool should be called with name "${MOCK_INDEX_NAME}".`,
},
],
task: async (input: string) => {
const client = await initializeClient()
const { promptOutput, toolCalls } = await runTask(client, model, input)
const toolCall = toolCalls.find(
(call) => call.toolName === VECTORIZE_TOOLS.vectorize_index_info
)
expect(toolCall, 'Tool vectorize_index_info was not called').toBeDefined()
expect(toolCall?.args, 'Arguments did not match').toEqual(
expect.objectContaining({
name: MOCK_INDEX_NAME,
})
)
return promptOutput
},
scorers: [checkFactuality],
threshold: 1,
timeout: 60000,
})

describeEval('Delete Vectorize Index', {
data: async () => [
{
input: `Delete the Vectorize index named "${MOCK_INDEX_NAME}".`,
expected: `The ${VECTORIZE_TOOLS.vectorize_index_delete} tool should be called with name "${MOCK_INDEX_NAME}".`,
},
],
task: async (input: string) => {
const client = await initializeClient()
const { promptOutput, toolCalls } = await runTask(client, model, input)
const toolCall = toolCalls.find(
(call) => call.toolName === VECTORIZE_TOOLS.vectorize_index_delete
)
expect(toolCall, 'Tool vectorize_index_delete was not called').toBeDefined()
expect(toolCall?.args, 'Arguments did not match').toEqual(
expect.objectContaining({
name: MOCK_INDEX_NAME,
})
)
return promptOutput
},
scorers: [checkFactuality],
threshold: 1,
timeout: 60000,
})
})
2 changes: 2 additions & 0 deletions apps/workers-bindings/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import { registerD1Tools } from '@repo/mcp-common/src/tools/d1'
import { registerHyperdriveTools } from '@repo/mcp-common/src/tools/hyperdrive'
import { registerKVTools } from '@repo/mcp-common/src/tools/kv_namespace'
import { registerR2BucketTools } from '@repo/mcp-common/src/tools/r2_bucket'
import { registerVectorizeTools } from '@repo/mcp-common/src/tools/vectorize'
import { registerWorkersTools } from '@repo/mcp-common/src/tools/worker'
import { MetricsTracker } from '@repo/mcp-observability'

Expand Down Expand Up @@ -74,6 +75,7 @@ export class WorkersBindingsMCP extends McpAgent<Env, WorkersBindingsMCPState, P
registerR2BucketTools(this)
registerD1Tools(this)
registerHyperdriveTools(this)
registerVectorizeTools(this)
}

async getActiveAccountId() {
Expand Down
Loading
Loading