Skip to content

Commit 1d305ab

Browse files
committed
More powerful runner
1 parent 8453c4a commit 1d305ab

File tree

3 files changed

+10
-5
lines changed

3 files changed

+10
-5
lines changed

.github/workflows/evals.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ jobs:
1313
evals:
1414
# Run if triggered manually or if PR has 'evals' label.
1515
if: github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'evals')
16-
runs-on: ubuntu-latest
16+
runs-on: blacksmith-16vcpu-ubuntu-2404
1717
timeout-minutes: 30
1818

1919
steps:

packages/evals/src/cli/index.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ const main = async () => {
2424

2525
try {
2626
if (ci) {
27-
await runCi()
27+
await runCi({ concurrency: 3, exercisesPerLanguage: 5 })
2828
} else if (runId !== -1) {
2929
await runEvals(runId)
3030
} else if (taskId !== -1) {

packages/evals/src/cli/runCi.ts

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ import { runEvals } from "./runEvals.js"
77

88
export const runCi = async ({
99
concurrency = 1,
10-
exercisesPerLanguage = 1,
10+
exercisesPerLanguage,
1111
}: {
1212
concurrency?: number
1313
exercisesPerLanguage?: number
@@ -17,8 +17,13 @@ export const runCi = async ({
1717
const run = await createRun({ model: "anthropic/claude-sonnet-4", socketPath: "", concurrency })
1818

1919
for (const language of exerciseLanguages) {
20-
const exercises = (await getExercisesForLanguage(EVALS_REPO_PATH, language)).slice(0, exercisesPerLanguage)
21-
await pMap(exercises, (exercise) => createTask({ runId: run.id, language, exercise }))
20+
let exercises = await getExercisesForLanguage(EVALS_REPO_PATH, language)
21+
22+
if (exercisesPerLanguage) {
23+
exercises = exercises.slice(0, exercisesPerLanguage)
24+
}
25+
26+
await pMap(exercises, (exercise) => createTask({ runId: run.id, language, exercise }), { concurrency })
2227
}
2328

2429
await runEvals(run.id)

0 commit comments

Comments
 (0)