Skip to content

Commit 0960e76

Browse files
committed
feat: add anthropic models to evals
1 parent 361262a commit 0960e76

File tree

5 files changed

+51
-3
lines changed

5 files changed

+51
-3
lines changed

.github/workflows/evals.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ jobs:
2525
echo "OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY }}" > ./apps/workers-bindings/.dev.vars
2626
echo "DEV_CLOUDFLARE_API_TOKEN=${{ secrets.DEV_CLOUDFLARE_API_TOKEN }}" >> ./apps/sandbox-container/.dev.vars
2727
echo "DEV_CLOUDFLARE_API_TOKEN=${{ secrets.DEV_CLOUDFLARE_API_TOKEN }}" >> ./apps/workers-bindings/.dev.vars
28+
echo "ANTHROPIC_KEY=${{ secrets.ANTHROPIC_KEY }}" >> ./apps/sandbox-container/.dev.vars
29+
echo "ANTHROPIC_KEY=${{ secrets.ANTHROPIC_KEY }}" >> ./apps/workers-bindings/.dev.vars
2830
- name: Verify .dev.vars file
2931
run: |
3032
du -h ./apps/sandbox-container/.dev.vars

apps/workers-bindings/evals/kv_namespaces.eval.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ eachModel('$modelName', ({ model }) => {
6464
task: async (input: string) => {
6565
const client = await initializeClient(/* Pass necessary mocks/config */)
6666
const { promptOutput, toolCalls } = await runTask(client, model, input)
67-
67+
console.log('toolCalls', toolCalls)
6868
const toolCall = toolCalls.find(
6969
(call) => call.toolName === KV_NAMESPACE_TOOLS.kv_namespace_update
7070
)

packages/eval-tools/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
"bin": "bin"
1212
},
1313
"dependencies": {
14+
"@ai-sdk/anthropic": "^1.2.11",
1415
"@ai-sdk/openai": "1.3.20",
1516
"@cloudflare/vitest-pool-workers": "0.8.14",
1617
"agents": "0.0.67",

packages/eval-tools/src/test-models.ts

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1+
import { createAnthropic } from '@ai-sdk/anthropic'
2+
import { AnthropicMessagesModelId } from '@ai-sdk/anthropic/internal'
13
import { createOpenAI } from '@ai-sdk/openai'
4+
import { OpenAIChatModelId } from '@ai-sdk/openai/internal'
25
import { env } from 'cloudflare:test'
36
import { describe } from 'vitest'
47
import { createWorkersAI } from 'workers-ai-provider'
@@ -13,7 +16,7 @@ type AiTextGenerationModels = Exclude<
1316
value2key<AiModels, BaseAiTextToImage>
1417
>
1518

16-
function getOpenAiModel(modelName: string) {
19+
function getOpenAiModel(modelName: OpenAIChatModelId) {
1720
if (!env.OPENAI_API_KEY) {
1821
throw new Error('No API token set!')
1922
}
@@ -26,6 +29,19 @@ function getOpenAiModel(modelName: string) {
2629
return { modelName, model, ai }
2730
}
2831

32+
function getAnthropicModel(modelName: AnthropicMessagesModelId) {
33+
if (!env.ANTHROPIC_KEY) {
34+
throw new Error('No Anthropic key set!')
35+
}
36+
const ai = createAnthropic({
37+
apiKey: env.ANTHROPIC_KEY,
38+
})
39+
40+
const model = ai(modelName)
41+
42+
return { modelName, model, ai }
43+
}
44+
2945
function getWorkersAiModel(modelName: AiTextGenerationModels) {
3046
if (!env.AI) {
3147
throw new Error('No AI binding provided!')
@@ -40,7 +56,8 @@ function getWorkersAiModel(modelName: AiTextGenerationModels) {
4056
export const eachModel = describe.each([
4157
getOpenAiModel('gpt-4o'),
4258
getOpenAiModel('gpt-4o-mini'),
43-
59+
getAnthropicModel('claude-3-5-sonnet-latest'),
60+
getAnthropicModel('claude-3-7-sonnet-latest'),
4461
// llama 3 is somewhat inconsistent
4562
//getWorkersAiModel("@cf/meta/llama-3.3-70b-instruct-fp8-fast")
4663
// Currently llama 4 is having issues with tool calling

pnpm-lock.yaml

Lines changed: 28 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)