Skip to content

Commit 5ec1252

Browse files
authored
Merge branch 'main' into mhart/add-docs-prompt
2 parents aed68b2 + a20708d commit 5ec1252

File tree

13 files changed

+226
-31
lines changed

13 files changed

+226
-31
lines changed

.github/workflows/evals.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,12 @@ jobs:
2121
echo "OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY }}" > ./apps/workers-bindings/.dev.vars
2222
echo "DEV_CLOUDFLARE_API_TOKEN=${{ secrets.DEV_CLOUDFLARE_API_TOKEN }}" >> ./apps/sandbox-container/.dev.vars
2323
echo "DEV_CLOUDFLARE_API_TOKEN=${{ secrets.DEV_CLOUDFLARE_API_TOKEN }}" >> ./apps/workers-bindings/.dev.vars
24+
echo "AI_GATEWAY_TOKEN=${{ secrets.AI_GATEWAY_TOKEN }}" >> ./apps/sandbox-container/.dev.vars
25+
echo "AI_GATEWAY_TOKEN=${{ secrets.AI_GATEWAY_TOKEN }}" >> ./apps/workers-bindings/.dev.vars
26+
echo "CLOUDFLARE_ACCOUNT_ID=${{ secrets.CLOUDFLARE_ACCOUNT_ID }}" >> ./apps/sandbox-container/.dev.vars
27+
echo "CLOUDFLARE_ACCOUNT_ID=${{ secrets.CLOUDFLARE_ACCOUNT_ID }}" >> ./apps/workers-bindings/.dev.vars
28+
echo "AI_GATEWAY_ID=${{ secrets.AI_GATEWAY_ID }}" >> ./apps/sandbox-container/.dev.vars
29+
echo "AI_GATEWAY_ID=${{ secrets.AI_GATEWAY_ID }}" >> ./apps/workers-bindings/.dev.vars
2430
- name: Verify .dev.vars file
2531
run: |
2632
du -h ./apps/sandbox-container/.dev.vars

.vscode/launch.json

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,32 @@
1010
"attachExistingChildren": false,
1111
"autoAttachChildProcesses": false,
1212
"sourceMaps": true // works with or without this line
13+
},
14+
{
15+
"type": "node",
16+
"request": "launch",
17+
"name": "Open inspector with Vitest",
18+
"runtimeExecutable": "npm",
19+
"runtimeArgs": ["run", "eval:dev"],
20+
"console": "integratedTerminal",
21+
"cwd": "${workspaceFolder}/apps/workers-bindings"
22+
},
23+
{
24+
"name": "Attach to Workers Runtime",
25+
"type": "node",
26+
"request": "attach",
27+
"port": 9229,
28+
"cwd": "/",
29+
"resolveSourceMapLocations": null,
30+
"attachExistingChildren": false,
31+
"autoAttachChildProcesses": false
32+
}
33+
],
34+
"compounds": [
35+
{
36+
"name": "Debug Workers tests",
37+
"configurations": ["Open inspector with Vitest", "Attach to Workers Runtime"],
38+
"stopAll": true
1339
}
1440
]
1541
}

apps/autorag/src/tools/autorag.tools.ts

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import Cloudflare from 'cloudflare'
1+
import { V4PagePaginationArray } from 'cloudflare/src/pagination.js'
22
import { z } from 'zod'
33

44
import { getCloudflareClient } from '@repo/mcp-common/src/cloudflare-api'
@@ -7,8 +7,6 @@ import { pageParam, perPageParam } from '../types'
77

88
import type { AutoRAGMCP } from '../autorag.app'
99

10-
import V4PagePaginationArray = Cloudflare.V4PagePaginationArray
11-
1210
export function registerAutoRAGTools(agent: AutoRAGMCP) {
1311
agent.server.tool(
1412
'list_rags',
@@ -33,6 +31,7 @@ export function registerAutoRAGTools(agent: AutoRAGMCP) {
3331
const client = getCloudflareClient(agent.props.accessToken)
3432
const r = (await client.getAPIList(
3533
`/accounts/${accountId}/autorag/rags`,
34+
// @ts-ignore
3635
V4PagePaginationArray,
3736
{ query: { page: params.page, per_page: params.per_page } }
3837
)) as unknown as {

apps/sandbox-container/server/sandbox.server.context.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@ export interface Env {
88
MCP_SERVER_NAME: string
99
MCP_SERVER_VERSION: string
1010
OPENAI_API_KEY: string
11+
AI_GATEWAY_TOKEN: string
12+
CLOUDFLARE_ACCOUNT_ID: string
13+
AI_GATEWAY_ID: string
1114
MCP_OBJECT: DurableObjectNamespace<ContainerMcpAgent>
1215
CONTAINER_MANAGER: DurableObjectNamespace<ContainerManager>
1316
USER_CONTAINER: DurableObjectNamespace<UserContainer>

apps/sandbox-container/types.d.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
declare module 'cloudflare:test' {
22
interface ProvidedEnv {
33
OPENAI_API_KEY: 'TODO'
4+
AI_GATEWAY_TOKEN: string
5+
CLOUDFLARE_ACCOUNT_ID: string
6+
AI_GATEWAY_ID: string
47
AI: Ai
58
}
69
}

apps/workers-bindings/evals/kv_namespaces.eval.ts

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ eachModel('$modelName', ({ model }) => {
1919
task: async (input: string) => {
2020
const client = await initializeClient(/* Pass necessary mocks/config */)
2121
const { promptOutput, toolCalls } = await runTask(client, model, input)
22-
2322
const toolCall = toolCalls.find(
2423
(call) => call.toolName === KV_NAMESPACE_TOOLS.kv_namespace_create
2524
)
@@ -41,7 +40,6 @@ eachModel('$modelName', ({ model }) => {
4140
task: async (input: string) => {
4241
const client = await initializeClient(/* Pass necessary mocks/config */)
4342
const { promptOutput, toolCalls } = await runTask(client, model, input)
44-
4543
const toolCall = toolCalls.find(
4644
(call) => call.toolName === KV_NAMESPACE_TOOLS.kv_namespaces_list
4745
)
@@ -56,15 +54,13 @@ eachModel('$modelName', ({ model }) => {
5654
describeEval('Rename Cloudflare KV Namespace', {
5755
data: async () => [
5856
{
59-
input:
60-
'Rename my Cloudflare KV Namespace called "my-test-namespace" to "my-new-test-namespace".',
57+
input: 'Rename my Cloudflare KV Namespace with ID 1234 to "my-new-test-namespace".',
6158
expected: `The ${KV_NAMESPACE_TOOLS.kv_namespace_update} tool should be called to rename the kv namespace.`,
6259
},
6360
],
6461
task: async (input: string) => {
6562
const client = await initializeClient(/* Pass necessary mocks/config */)
6663
const { promptOutput, toolCalls } = await runTask(client, model, input)
67-
6864
const toolCall = toolCalls.find(
6965
(call) => call.toolName === KV_NAMESPACE_TOOLS.kv_namespace_update
7066
)
@@ -79,14 +75,13 @@ eachModel('$modelName', ({ model }) => {
7975
describeEval('Get Cloudflare KV Namespace Details', {
8076
data: async () => [
8177
{
82-
input: 'Get details of my Cloudflare KV Namespace called "my-new-test-namespace".',
78+
input: 'Get details of my Cloudflare KV Namespace with ID 1234.',
8379
expected: `The ${KV_NAMESPACE_TOOLS.kv_namespace_get} tool should be called to retrieve the details of the kv namespace.`,
8480
},
8581
],
8682
task: async (input: string) => {
8783
const client = await initializeClient(/* Pass necessary mocks/config */)
8884
const { promptOutput, toolCalls } = await runTask(client, model, input)
89-
9085
const toolCall = toolCalls.find(
9186
(call) => call.toolName === KV_NAMESPACE_TOOLS.kv_namespace_get
9287
)
@@ -101,14 +96,13 @@ eachModel('$modelName', ({ model }) => {
10196
describeEval('Delete Cloudflare KV Namespace', {
10297
data: async () => [
10398
{
104-
input: 'Look up the id of my only KV namespace and delete it.',
99+
input: 'Delete the kv namespace with ID 1234.',
105100
expected: `The ${KV_NAMESPACE_TOOLS.kv_namespace_delete} tool should be called to delete the kv namespace.`,
106101
},
107102
],
108103
task: async (input: string) => {
109104
const client = await initializeClient(/* Pass necessary mocks/config */)
110105
const { promptOutput, toolCalls } = await runTask(client, model, input)
111-
112106
const toolCall = toolCalls.find(
113107
(call) => call.toolName === KV_NAMESPACE_TOOLS.kv_namespace_delete
114108
)

apps/workers-bindings/src/bindings.context.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,5 +16,8 @@ export interface Env {
1616
DEV_CLOUDFLARE_EMAIL: string
1717
CLOUDFLARE_API_TOKEN: string
1818
OPENAI_API_KEY: string
19+
AI_GATEWAY_TOKEN: string
20+
CLOUDFLARE_ACCOUNT_ID: string
21+
AI_GATEWAY_ID: string
1922
AI: Ai
2023
}

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
"fix:deps": "run-fix-deps",
2121
"test:watch": "vitest",
2222
"eval:ci": "run-turbo eval:ci",
23+
"eval:dev": "run-turbo eval:dev",
2324
"update-deps": "syncpack update"
2425
},
2526
"devDependencies": {

packages/eval-tools/package.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,13 @@
1111
"bin": "bin"
1212
},
1313
"dependencies": {
14+
"@ai-sdk/anthropic": "1.2.11",
15+
"@ai-sdk/google": "1.2.17",
1416
"@ai-sdk/openai": "1.3.20",
1517
"@cloudflare/vitest-pool-workers": "0.8.14",
1618
"agents": "0.0.67",
1719
"ai": "4.3.10",
20+
"ai-gateway-provider": "0.0.6",
1821
"workers-ai-provider": "0.3.0",
1922
"wrangler": "4.10.0",
2023
"zod": "3.24.2"

packages/eval-tools/src/runTask.ts

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
11
import { type MCPClientManager } from 'agents/mcp/client'
2-
import { jsonSchema, streamText, tool } from 'ai'
2+
import { generateText, jsonSchema, tool } from 'ai'
33
import { z } from 'zod'
44

5-
import type { LanguageModelV1, StreamTextResult, ToolCallPart, ToolSet } from 'ai'
5+
import type { GenerateTextResult, LanguageModelV1, ToolCallPart, ToolSet } from 'ai'
66

77
export async function runTask(
88
clientManager: MCPClientManager,
99
model: LanguageModelV1,
1010
input: string
1111
): Promise<{
1212
promptOutput: string
13-
fullResult: StreamTextResult<ToolSet, never>
13+
fullResult: GenerateTextResult<ToolSet, never>
1414
toolCalls: ToolCallPart[]
1515
}> {
1616
const tools = clientManager.listTools()
@@ -43,7 +43,7 @@ export async function runTask(
4343
return acc
4444
}, {} as ToolSet)
4545

46-
const res = streamText({
46+
const res = await generateText({
4747
model,
4848
system:
4949
"You are an assistant responsible for evaluating the results of calling various tools. Given the user's query, use the tools available to you to answer the question.",
@@ -53,15 +53,10 @@ export async function runTask(
5353
maxSteps: 10,
5454
})
5555

56-
// we need to consume the fill stream, so this is empty
57-
// eslint-disable-next-line no-empty
58-
for await (const _ of res.fullStream) {
59-
}
60-
6156
// convert into an LLM readable result so our factuality checker can validate tool calls
6257
let messagesWithTools = ''
6358
const toolCalls: ToolCallPart[] = []
64-
const response = await res.response
59+
const response = res.response
6560
const messages = response.messages
6661

6762
for (const message of messages) {

0 commit comments

Comments
 (0)