Skip to content

Commit 88ad4ce

Browse files
authored
fix: make kv namespaces evals better (#136)
Co-authored-by: [email protected] <[email protected]>
1 parent 599bfcf commit 88ad4ce

File tree

4 files changed

+57
-24
lines changed

4 files changed

+57
-24
lines changed

apps/workers-bindings/evals/kv_namespaces.eval.ts

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { describeEval } from 'vitest-evals'
33

44
import { checkFactuality } from '@repo/eval-tools/src/scorers'
55
import { eachModel } from '@repo/eval-tools/src/test-models'
6+
import { KV_NAMESPACE_TOOLS } from '@repo/mcp-common/src/tools/kv_namespace'
67

78
import { initializeClient, runTask } from './utils' // Assuming utils.ts will exist here
89

@@ -11,14 +12,16 @@ eachModel('$modelName', ({ model }) => {
1112
data: async () => [
1213
{
1314
input: 'Create a new Cloudflare KV Namespace called "my-test-namespace".',
14-
expected: 'The kv_namespaces_create tool should be called to create a new kv namespace.',
15+
expected: `The ${KV_NAMESPACE_TOOLS.kv_namespace_create} tool should be called to create a new kv namespace.`,
1516
},
1617
],
1718
task: async (input: string) => {
1819
const client = await initializeClient(/* Pass necessary mocks/config */)
1920
const { promptOutput, toolCalls } = await runTask(client, model, input)
2021

21-
const toolCall = toolCalls.find((call) => call.toolName === 'kv_namespace_create')
22+
const toolCall = toolCalls.find(
23+
(call) => call.toolName === KV_NAMESPACE_TOOLS.kv_namespace_create
24+
)
2225
expect(toolCall, 'Tool kv_namespace_create was not called').toBeDefined()
2326

2427
return promptOutput
@@ -31,15 +34,16 @@ eachModel('$modelName', ({ model }) => {
3134
data: async () => [
3235
{
3336
input: 'List all my Cloudflare KV Namespaces.',
34-
expected:
35-
'The kv_namespaces_list tool should be called to retrieve the list of kv namespaces. There should be at least one kv namespace in the list.',
37+
expected: `The ${KV_NAMESPACE_TOOLS.kv_namespaces_list} tool should be called to retrieve the list of kv namespaces. There should be at least one kv namespace in the list.`,
3638
},
3739
],
3840
task: async (input: string) => {
3941
const client = await initializeClient(/* Pass necessary mocks/config */)
4042
const { promptOutput, toolCalls } = await runTask(client, model, input)
4143

42-
const toolCall = toolCalls.find((call) => call.toolName === 'kv_namespaces_list')
44+
const toolCall = toolCalls.find(
45+
(call) => call.toolName === KV_NAMESPACE_TOOLS.kv_namespaces_list
46+
)
4347
expect(toolCall, 'Tool kv_namespaces_list was not called').toBeDefined()
4448

4549
return promptOutput
@@ -53,14 +57,16 @@ eachModel('$modelName', ({ model }) => {
5357
{
5458
input:
5559
'Rename my Cloudflare KV Namespace called "my-test-namespace" to "my-new-test-namespace".',
56-
expected: 'The kv_namespace_update tool should be called to rename the kv namespace.',
60+
expected: `The ${KV_NAMESPACE_TOOLS.kv_namespace_update} tool should be called to rename the kv namespace.`,
5761
},
5862
],
5963
task: async (input: string) => {
6064
const client = await initializeClient(/* Pass necessary mocks/config */)
6165
const { promptOutput, toolCalls } = await runTask(client, model, input)
6266

63-
const toolCall = toolCalls.find((call) => call.toolName === 'kv_namespace_update')
67+
const toolCall = toolCalls.find(
68+
(call) => call.toolName === KV_NAMESPACE_TOOLS.kv_namespace_update
69+
)
6470
expect(toolCall, 'Tool kv_namespace_update was not called').toBeDefined()
6571

6672
return promptOutput
@@ -73,16 +79,16 @@ eachModel('$modelName', ({ model }) => {
7379
data: async () => [
7480
{
7581
input: 'Get details of my Cloudflare KV Namespace called "my-new-test-namespace".',
76-
expected:
77-
'The kv_namespace_get tool should be called to retrieve the details of the kv namespace.',
82+
expected: `The ${KV_NAMESPACE_TOOLS.kv_namespace_get} tool should be called to retrieve the details of the kv namespace.`,
7883
},
7984
],
8085
task: async (input: string) => {
8186
const client = await initializeClient(/* Pass necessary mocks/config */)
82-
const { promptOutput, toolCalls, fullResult } = await runTask(client, model, input)
87+
const { promptOutput, toolCalls } = await runTask(client, model, input)
8388

84-
console.log('fullResult', JSON.stringify(await fullResult.response, null, 2))
85-
const toolCall = toolCalls.find((call) => call.toolName === 'kv_namespace_get')
89+
const toolCall = toolCalls.find(
90+
(call) => call.toolName === KV_NAMESPACE_TOOLS.kv_namespace_get
91+
)
8692
expect(toolCall, 'Tool kv_namespace_get was not called').toBeDefined()
8793

8894
return promptOutput
@@ -95,14 +101,16 @@ eachModel('$modelName', ({ model }) => {
95101
data: async () => [
96102
{
97103
input: 'Look up the id of my only KV namespace and delete it.',
98-
expected: 'The kv_namespace_delete tool should be called to delete the kv namespace.',
104+
expected: `The ${KV_NAMESPACE_TOOLS.kv_namespace_delete} tool should be called to delete the kv namespace.`,
99105
},
100106
],
101107
task: async (input: string) => {
102108
const client = await initializeClient(/* Pass necessary mocks/config */)
103109
const { promptOutput, toolCalls } = await runTask(client, model, input)
104110

105-
const toolCall = toolCalls.find((call) => call.toolName === 'kv_namespace_delete')
111+
const toolCall = toolCalls.find(
112+
(call) => call.toolName === KV_NAMESPACE_TOOLS.kv_namespace_delete
113+
)
106114
expect(toolCall, 'Tool kv_namespace_delete was not called').toBeDefined()
107115

108116
return promptOutput

apps/workers-bindings/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
"deploy:staging": "wrangler deploy --env staging",
1010
"deploy:production": "wrangler deploy --env production",
1111
"eval:dev": "start-server-and-test --expect 404 eval:server http://localhost:8977 'vitest --testTimeout=60000 --config vitest.config.evals.ts'",
12-
"eval:server": "wrangler dev --var ENVIRONMENT:test --var DEV_DISABLE_OAUTH:true --var DEV_CLOUDFLARE_EMAIL:[email protected] --inspector-port 9230",
12+
"eval:server": "wrangler dev --var ENVIRONMENT:test --var DEV_DISABLE_OAUTH:true --var DEV_CLOUDFLARE_EMAIL:[email protected] --inspector-port 9230 --port 8977",
1313
"eval:ci": "start-server-and-test --expect 404 eval:server http://localhost:8977 'vitest run --testTimeout=60000 --config vitest.config.evals.ts'",
1414
"dev": "wrangler dev",
1515
"start": "wrangler dev",

apps/workers-bindings/wrangler.jsonc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
"enabled": true
3737
},
3838
"dev": {
39-
"port": 8977
39+
"port": 8976
4040
},
4141
"vars": {
4242
"ENVIRONMENT": "development",

packages/mcp-common/src/tools/kv_namespace.ts

Lines changed: 33 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,27 @@ import {
77
KvNamespaceTitleSchema,
88
} from '../types/kv_namespace'
99

10+
export const KV_NAMESPACE_TOOLS = {
11+
kv_namespaces_list: 'kv_namespaces_list',
12+
kv_namespace_create: 'kv_namespace_create',
13+
kv_namespace_delete: 'kv_namespace_delete',
14+
kv_namespace_get: 'kv_namespace_get',
15+
kv_namespace_update: 'kv_namespace_update',
16+
}
17+
1018
export function registerKVTools(agent: CloudflareMcpAgent) {
1119
/**
1220
* Tool to list KV namespaces.
1321
*/
1422
agent.server.tool(
15-
'kv_namespaces_list',
16-
'List all of the kv namespaces in your Cloudflare account',
23+
KV_NAMESPACE_TOOLS.kv_namespaces_list,
24+
`
25+
List all of the kv namespaces in your Cloudflare account.
26+
Use this tool when you need to list all of the kv namespaces in your Cloudflare account.
27+
Returns a list of kv namespaces with the following properties:
28+
- id: The id of the kv namespace.
29+
- title: The title of the kv namespace.
30+
`,
1731
{ params: KvNamespacesListParamsSchema.optional() },
1832
async ({ params }) => {
1933
const account_id = await agent.getActiveAccountId()
@@ -27,7 +41,11 @@ export function registerKVTools(agent: CloudflareMcpAgent) {
2741
...params,
2842
})
2943

30-
const namespaces = response.result ?? []
44+
let namespaces = response.result ?? []
45+
namespaces = namespaces.map((namespace) => ({
46+
id: namespace.id,
47+
title: namespace.title,
48+
}))
3149

3250
return {
3351
content: [
@@ -57,7 +75,7 @@ export function registerKVTools(agent: CloudflareMcpAgent) {
5775
* Tool to create a KV namespace.
5876
*/
5977
agent.server.tool(
60-
'kv_namespace_create',
78+
KV_NAMESPACE_TOOLS.kv_namespace_create,
6179
'Create a new kv namespace in your Cloudflare account',
6280
{
6381
title: KvNamespaceTitleSchema,
@@ -95,7 +113,7 @@ export function registerKVTools(agent: CloudflareMcpAgent) {
95113
* Tool to delete a KV namespace.
96114
*/
97115
agent.server.tool(
98-
'kv_namespace_delete',
116+
KV_NAMESPACE_TOOLS.kv_namespace_delete,
99117
'Delete a kv namespace in your Cloudflare account',
100118
{
101119
namespace_id: KvNamespaceIdSchema,
@@ -133,8 +151,15 @@ export function registerKVTools(agent: CloudflareMcpAgent) {
133151
* Tool to get details of a specific KV namespace.
134152
*/
135153
agent.server.tool(
136-
'kv_namespace_get',
137-
'Get details of a kv namespace in your Cloudflare account',
154+
KV_NAMESPACE_TOOLS.kv_namespace_get,
155+
`Get details of a kv namespace in your Cloudflare account.
156+
Use this tool when you need to get details of a specific kv namespace in your Cloudflare account.
157+
Returns a kv namespace with the following properties:
158+
- id: The id of the kv namespace.
159+
- title: The title of the kv namespace.
160+
- supports_url_encoding: Whether the kv namespace supports url encoding.
161+
- beta: Whether the kv namespace is in beta.
162+
`,
138163
{
139164
namespace_id: KvNamespaceIdSchema,
140165
},
@@ -171,7 +196,7 @@ export function registerKVTools(agent: CloudflareMcpAgent) {
171196
* Tool to update the title of a KV namespace.
172197
*/
173198
agent.server.tool(
174-
'kv_namespace_update',
199+
KV_NAMESPACE_TOOLS.kv_namespace_update,
175200
'Update the title of a kv namespace in your Cloudflare account',
176201
{
177202
namespace_id: KvNamespaceIdSchema,

0 commit comments

Comments
 (0)