Skip to content

Commit dd41904

Browse files
committed
feat: add configurable timeout for evals (5-10 min)
- Add timeout field to CreateRun schema with min 5, max 10, default 5 - Add timeout slider UI component to /runs/new page - Update database schema to include timeout column in runs table - Create migration to add timeout column with default value of 5 - Update runTask.ts to use configurable timeout from run settings - Pass timeout parameter through the createRun action
1 parent 670d79b commit dd41904

File tree

6 files changed

+38
-2
lines changed

6 files changed

+38
-2
lines changed

apps/web-evals/src/actions/runs.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,10 @@ import { CreateRun } from "@/lib/schemas"
2222
const EVALS_REPO_PATH = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "../../../../../evals")
2323

2424
// eslint-disable-next-line @typescript-eslint/no-unused-vars
25-
export async function createRun({ suite, exercises = [], systemPrompt, ...values }: CreateRun) {
25+
export async function createRun({ suite, exercises = [], systemPrompt, timeout, ...values }: CreateRun) {
2626
const run = await _createRun({
2727
...values,
28+
timeout,
2829
socketPath: "", // TODO: Get rid of this.
2930
})
3031

apps/web-evals/src/app/runs/new/new-run.tsx

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ import {
2121
CONCURRENCY_MIN,
2222
CONCURRENCY_MAX,
2323
CONCURRENCY_DEFAULT,
24+
TIMEOUT_MIN,
25+
TIMEOUT_MAX,
26+
TIMEOUT_DEFAULT,
2427
} from "@/lib/schemas"
2528
import { cn } from "@/lib/utils"
2629
import { useOpenRouterModels } from "@/hooks/use-open-router-models"
@@ -77,6 +80,7 @@ export function NewRun() {
7780
exercises: [],
7881
settings: undefined,
7982
concurrency: CONCURRENCY_DEFAULT,
83+
timeout: TIMEOUT_DEFAULT,
8084
},
8185
})
8286

@@ -341,6 +345,29 @@ export function NewRun() {
341345
)}
342346
/>
343347

348+
<FormField
349+
control={form.control}
350+
name="timeout"
351+
render={({ field }) => (
352+
<FormItem>
353+
<FormLabel>Timeout (minutes)</FormLabel>
354+
<FormControl>
355+
<div className="flex flex-row items-center gap-2">
356+
<Slider
357+
defaultValue={[field.value]}
358+
min={TIMEOUT_MIN}
359+
max={TIMEOUT_MAX}
360+
step={1}
361+
onValueChange={(value) => field.onChange(value[0])}
362+
/>
363+
<div>{field.value} min</div>
364+
</div>
365+
</FormControl>
366+
<FormMessage />
367+
</FormItem>
368+
)}
369+
/>
370+
344371
<FormField
345372
control={form.control}
346373
name="description"

apps/web-evals/src/lib/schemas.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@ export const CONCURRENCY_MIN = 1
1212
export const CONCURRENCY_MAX = 25
1313
export const CONCURRENCY_DEFAULT = 1
1414

15+
export const TIMEOUT_MIN = 5
16+
export const TIMEOUT_MAX = 10
17+
export const TIMEOUT_DEFAULT = 5
18+
1519
export const createRunSchema = z
1620
.object({
1721
model: z.string().min(1, { message: "Model is required." }),
@@ -20,6 +24,7 @@ export const createRunSchema = z
2024
exercises: z.array(z.string()).optional(),
2125
settings: rooCodeSettingsSchema.optional(),
2226
concurrency: z.number().int().min(CONCURRENCY_MIN).max(CONCURRENCY_MAX),
27+
timeout: z.number().int().min(TIMEOUT_MIN).max(TIMEOUT_MAX),
2328
systemPrompt: z.string().optional(),
2429
})
2530
.refine((data) => data.suite === "full" || (data.exercises || []).length > 0, {

packages/evals/src/cli/runTask.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -304,9 +304,10 @@ export const runTask = async ({ run, task, publish, logger }: RunTaskOptions) =>
304304
})
305305

306306
try {
307+
const timeoutMs = (run.timeout || 5) * 60 * 1_000 // Convert minutes to milliseconds
307308
await pWaitFor(() => !!taskFinishedAt || !!taskAbortedAt || isClientDisconnected, {
308309
interval: 1_000,
309-
timeout: EVALS_TIMEOUT,
310+
timeout: timeoutMs,
310311
})
311312
} catch (_error) {
312313
taskTimedOut = true
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
ALTER TABLE "runs" ADD COLUMN "timeout" integer DEFAULT 5 NOT NULL;

packages/evals/src/db/schema.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ export const runs = pgTable("runs", {
1818
pid: integer(),
1919
socketPath: text("socket_path").notNull(),
2020
concurrency: integer().default(2).notNull(),
21+
timeout: integer().default(5).notNull(),
2122
passed: integer().default(0).notNull(),
2223
failed: integer().default(0).notNull(),
2324
createdAt: timestamp("created_at").notNull(),

0 commit comments

Comments
 (0)