Skip to content

Commit 71637e4

Browse files
committed
feat: add hyperdrive bindings and evals
1 parent 895162f commit 71637e4

File tree

20 files changed

+739
-92
lines changed

20 files changed

+739
-92
lines changed

apps/docs-autorag/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
"@modelcontextprotocol/sdk": "1.10.2",
1818
"@repo/mcp-common": "workspace:*",
1919
"@repo/mcp-observability": "workspace:*",
20-
"agents": "0.0.67",
20+
"agents": "0.0.75",
2121
"cloudflare": "4.2.0",
2222
"hono": "4.7.6",
2323
"mime": "4.0.6",

apps/radar/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
"@modelcontextprotocol/sdk": "1.10.2",
1818
"@repo/mcp-common": "workspace:*",
1919
"@repo/mcp-observability": "workspace:*",
20-
"agents": "0.0.67",
20+
"agents": "0.0.75",
2121
"cloudflare": "4.2.0",
2222
"hono": "4.7.6",
2323
"zod": "3.24.2"

apps/sandbox-container/package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
"@repo/mcp-common": "workspace:*",
2828
"@repo/mcp-observability": "workspace:*",
2929
"@types/node": "22.14.1",
30-
"agents": "0.0.67",
30+
"agents": "0.0.75",
3131
"cron-schedule": "5.0.4",
3232
"esbuild": "0.25.1",
3333
"hono": "4.7.6",
@@ -40,7 +40,7 @@
4040
"devDependencies": {
4141
"@cloudflare/vitest-pool-workers": "0.8.14",
4242
"@types/mock-fs": "4.13.4",
43-
"ai": "4.3.6",
43+
"ai": "4.3.10",
4444
"concurrently": "9.1.2",
4545
"mock-fs": "5.5.0",
4646
"start-server-and-test": "2.0.11",

apps/sandbox-container/server/index.ts

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,4 @@ export default {
7676
clientRegistrationEndpoint: '/register',
7777
}).fetch(req, env, ctx)
7878
},
79-
} /*
80-
81-
*/
79+
}
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
import { expect } from 'vitest'
2+
import { describeEval } from 'vitest-evals'
3+
4+
import { checkFactuality } from '@repo/eval-tools/src/scorers'
5+
import { eachModel } from '@repo/eval-tools/src/test-models'
6+
7+
import { initializeClient, runTask } from './utils' // Assuming utils.ts will exist here
8+
9+
// Define a mock account ID for testing
10+
const MOCK_ACCOUNT_ID = 'mock-account-12345'
11+
12+
eachModel('$modelName', ({ model }) => {
13+
describeEval('Account Tool Evaluations', {
14+
data: async () => [
15+
{
16+
input: 'List all my Cloudflare accounts.',
17+
expected: 'The accounts_list tool should be called to retrieve the list of accounts.',
18+
},
19+
{
20+
input: `Set my active Cloudflare account to ${MOCK_ACCOUNT_ID}.`,
21+
expected: `The set_active_account tool should be called with the account ID ${MOCK_ACCOUNT_ID}.`,
22+
},
23+
],
24+
task: async (input: string) => {
25+
const client = await initializeClient(/* Pass necessary mocks/config */)
26+
const { promptOutput, toolCalls, fullResult } = await runTask(client, model, input)
27+
28+
if (input.includes('List all my Cloudflare accounts')) {
29+
const toolCall = toolCalls.find((call) => call.toolName === 'accounts_list')
30+
expect(toolCall, 'Tool accounts_list was not called').toBeDefined()
31+
} else if (input.includes(`Set my active Cloudflare account to ${MOCK_ACCOUNT_ID}`)) {
32+
const toolCall = toolCalls.find((call) => call.toolName === 'set_active_account')
33+
expect(toolCall, 'Tool set_active_account was not called').toBeDefined()
34+
35+
expect(toolCall?.args, 'Arguments for set_active_account did not match').toEqual(
36+
expect.objectContaining({ activeAccountIdParam: MOCK_ACCOUNT_ID })
37+
)
38+
}
39+
40+
return promptOutput
41+
},
42+
scorers: [checkFactuality],
43+
threshold: 1,
44+
timeout: 60000, // 60 seconds
45+
})
46+
})
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
import { expect } from 'vitest'
2+
import { describeEval } from 'vitest-evals'
3+
4+
import { checkFactuality } from '@repo/eval-tools/src/scorers'
5+
import { eachModel } from '@repo/eval-tools/src/test-models'
6+
7+
import { initializeClient, runTask } from './utils' // Assuming utils.ts will exist here
8+
9+
eachModel('$modelName', ({ model }) => {
10+
describeEval('KV Namespaces Tool Evaluations', {
11+
data: async () => [
12+
{
13+
input: 'Create a new Cloudflare KV Namespace called "my-test-namespace".',
14+
expected: 'The kv_namespaces_create tool should be called to create a new kv namespace.',
15+
},
16+
{
17+
input: 'List all my Cloudflare KV Namespaces.',
18+
expected: 'The kv_namespaces_list tool should be called to retrieve the list of kv namespaces. There should be at least one kv namespace in the list.',
19+
},
20+
{
21+
input: 'Rename my Cloudflare KV Namespace called "my-test-namespace" to "my-new-test-namespace".',
22+
expected: 'The kv_namespace_update tool should be called to rename the kv namespace.',
23+
},
24+
{
25+
input: 'Get details of my Cloudflare KV Namespace called "my-new-test-namespace".',
26+
expected: 'The kv_namespace_get tool should be called to retrieve the details of the kv namespace.',
27+
},
28+
{
29+
input: 'Delete my Cloudflare KV Namespace called "my-new-test-namespace".',
30+
expected: 'The kv_namespace_delete tool should be called to delete the kv namespace.',
31+
},
32+
33+
34+
],
35+
task: async (input: string) => {
36+
const client = await initializeClient(/* Pass necessary mocks/config */)
37+
const { promptOutput, toolCalls, fullResult } = await runTask(client, model, input)
38+
39+
if (input.includes('List all my Cloudflare KV Namespaces')) {
40+
console.log('fullResult', JSON.stringify(await fullResult.response, null, 2))
41+
const toolCall = toolCalls.find((call) => call.toolName === 'kv_namespaces_list')
42+
expect(toolCall, 'Tool kv_namespaces_list was not called').toBeDefined()
43+
}
44+
45+
return promptOutput
46+
},
47+
scorers: [checkFactuality],
48+
threshold: 1,
49+
timeout: 60000, // 60 seconds
50+
})
51+
})
File renamed without changes.
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
import { MCPClientManager } from 'agents/mcp/client'
2+
import { streamText, tool, jsonSchema } from 'ai'
3+
import { z } from 'zod'
4+
5+
import type { LanguageModelV1, StreamTextResult, ToolCallPart, ToolSet } from 'ai'
6+
7+
export async function initializeClient(): Promise<MCPClientManager> {
8+
const clientManager = new MCPClientManager('test-client', '0.0.0')
9+
await clientManager.connect('http://localhost:8976/sse')
10+
return clientManager
11+
}
12+
13+
export async function runTask(
14+
clientManager: MCPClientManager,
15+
model: LanguageModelV1,
16+
input: string
17+
): Promise<{
18+
promptOutput: string
19+
fullResult: StreamTextResult<ToolSet, never>
20+
toolCalls: ToolCallPart[]
21+
}> {
22+
const tools = clientManager.listTools()
23+
const toolSet: ToolSet = tools.reduce((acc, v) => {
24+
if (!v.inputSchema.properties) {
25+
v.inputSchema.properties = {}
26+
}
27+
28+
acc[v.name] = tool({
29+
parameters: jsonSchema(v.inputSchema as any),
30+
description: v.description,
31+
execute: async (args: any, opts) => {
32+
try {
33+
const res = await clientManager.callTool(
34+
{
35+
...v,
36+
arguments: { ...args },
37+
},
38+
z.any() as any,
39+
{ signal: opts.abortSignal }
40+
)
41+
return res.content
42+
} catch (e) {
43+
console.log('Error calling tool')
44+
console.log(e)
45+
return e
46+
}
47+
},
48+
})
49+
return acc
50+
}, {} as ToolSet)
51+
52+
53+
const res = streamText({
54+
model,
55+
system:
56+
"You are an assistant responsible for evaluating the results of calling various tools. Given the user's query, use the tools available to you to answer the question.",
57+
tools: toolSet,
58+
prompt: input,
59+
maxRetries: 1,
60+
maxSteps: 10,
61+
})
62+
63+
for await (const part of res.fullStream) {
64+
}
65+
66+
// convert into an LLM readable result so our factuality checker can validate tool calls
67+
let messagesWithTools = ''
68+
const toolCalls: ToolCallPart[] = []
69+
const response = await res.response
70+
const messages = response.messages
71+
72+
for (const message of messages) {
73+
for (const messagePart of message.content) {
74+
if (typeof messagePart === 'string') {
75+
messagesWithTools += `<message_content type="text">${messagePart}</message_content>`
76+
} else if (messagePart.type === 'tool-call') {
77+
messagesWithTools += `<message_content type=${messagePart.type}>
78+
<tool_name>${messagePart.toolName}</tool_name>
79+
<tool_arguments>${JSON.stringify(messagePart.args)}</tool_arguments>
80+
</message_content>`
81+
toolCalls.push(messagePart)
82+
} else if (messagePart.type === 'text') {
83+
messagesWithTools += `<message_content type=${messagePart.type}>${messagePart.text}</message_content>`
84+
}
85+
}
86+
}
87+
88+
return { promptOutput: messagesWithTools, fullResult: res, toolCalls }
89+
}

apps/workers-bindings/package.json

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88
"deploy": "wrangler deploy",
99
"deploy:staging": "wrangler deploy --env staging",
1010
"deploy:production": "wrangler deploy --env production",
11+
"eval:dev": "start-server-and-test --expect 404 eval:server http://localhost:8976 'vitest --testTimeout=60000 --config vitest.config.evals.ts'",
12+
"eval:server": "wrangler dev --var ENVIRONMENT:test",
13+
"eval:ci": "start-server-and-test --expect 404 eval:server http://localhost:8976 'vitest run --testTimeout=60000 --config vitest.config.evals.ts'",
1114
"dev": "wrangler dev",
1215
"start": "wrangler dev",
1316
"types": "wrangler types --include-env=false",
@@ -25,10 +28,15 @@
2528
"@cloudflare/workers-oauth-provider": "0.0.3",
2629
"@modelcontextprotocol/sdk": "1.10.2",
2730
"@n8n/json-schema-to-zod": "1.1.0",
31+
"@repo/eval-tools": "workspace:*",
2832
"@repo/mcp-common": "workspace:*",
2933
"@repo/mcp-observability": "workspace:*",
30-
"agents": "0.0.67",
34+
"agents": "0.0.75",
35+
"ai": "4.3.10",
36+
"concurrently": "9.1.2",
3137
"hono": "4.7.6",
38+
"start-server-and-test": "2.0.11",
39+
"vitest-evals": "0.1.4",
3240
"zod": "3.24.2"
3341
}
3442
}

apps/workers-bindings/src/context.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,5 @@ export interface Env {
1111
MCP_OBJECT: DurableObjectNamespace<WorkersBindingsMCP>
1212
USER_DETAILS: DurableObjectNamespace<UserDetails>
1313
MCP_METRICS: AnalyticsEngineDataset
14+
CLOUDFLARE_API_TOKEN: string
1415
}

0 commit comments

Comments
 (0)