Skip to content

Commit c96b399

Browse files
roomote[bot]roomotehannesrudolph
authored
feat: add configurable timeout for evals (5-10 min) (RooCodeInc#5865)
* feat: add configurable timeout for evals (5-10 min) - Add timeout field to CreateRun schema with min 5, max 10, default 5 - Add timeout slider UI component to /runs/new page - Update database schema to include timeout column in runs table - Create migration to add timeout column with default value of 5 - Update runTask.ts to use configurable timeout from run settings - Pass timeout parameter through the createRun action * fix: remove unused EVALS_TIMEOUT import * fix: add timeout field to createRun calls in copyRun test - Added timeout: 5 to both createRun calls in copyRun.spec.ts - This fixes the test failure caused by the new required timeout field in the runs schema - The timeout field was added in the configurable timeout feature but the test was not updated * fix: use configurable timeout for Redis key expiration in registerRunner - Updated registerRunner function to accept timeoutSeconds parameter - Modified call in runTask.ts to pass configurable timeout instead of hardcoded EVALS_TIMEOUT - Removed unused EVALS_TIMEOUT import from redis.ts - Ensures Redis keys remain valid for the entire duration of task execution (up to 10 minutes) --------- Co-authored-by: Roo Code <[email protected]> Co-authored-by: hannesrudolph <[email protected]>
1 parent 1b12108 commit c96b399

File tree

8 files changed

+52
-16
lines changed

8 files changed

+52
-16
lines changed

apps/web-evals/src/actions/runs.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,10 @@ import { CreateRun } from "@/lib/schemas"
2222
const EVALS_REPO_PATH = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "../../../../../evals")
2323

2424
// eslint-disable-next-line @typescript-eslint/no-unused-vars
25-
export async function createRun({ suite, exercises = [], systemPrompt, ...values }: CreateRun) {
25+
export async function createRun({ suite, exercises = [], systemPrompt, timeout, ...values }: CreateRun) {
2626
const run = await _createRun({
2727
...values,
28+
timeout,
2829
socketPath: "", // TODO: Get rid of this.
2930
})
3031

apps/web-evals/src/app/runs/new/new-run.tsx

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ import {
2121
CONCURRENCY_MIN,
2222
CONCURRENCY_MAX,
2323
CONCURRENCY_DEFAULT,
24+
TIMEOUT_MIN,
25+
TIMEOUT_MAX,
26+
TIMEOUT_DEFAULT,
2427
} from "@/lib/schemas"
2528
import { cn } from "@/lib/utils"
2629
import { useOpenRouterModels } from "@/hooks/use-open-router-models"
@@ -77,6 +80,7 @@ export function NewRun() {
7780
exercises: [],
7881
settings: undefined,
7982
concurrency: CONCURRENCY_DEFAULT,
83+
timeout: TIMEOUT_DEFAULT,
8084
},
8185
})
8286

@@ -341,6 +345,29 @@ export function NewRun() {
341345
)}
342346
/>
343347

348+
<FormField
349+
control={form.control}
350+
name="timeout"
351+
render={({ field }) => (
352+
<FormItem>
353+
<FormLabel>Timeout (minutes)</FormLabel>
354+
<FormControl>
355+
<div className="flex flex-row items-center gap-2">
356+
<Slider
357+
defaultValue={[field.value]}
358+
min={TIMEOUT_MIN}
359+
max={TIMEOUT_MAX}
360+
step={1}
361+
onValueChange={(value) => field.onChange(value[0])}
362+
/>
363+
<div>{field.value} min</div>
364+
</div>
365+
</FormControl>
366+
<FormMessage />
367+
</FormItem>
368+
)}
369+
/>
370+
344371
<FormField
345372
control={form.control}
346373
name="description"

apps/web-evals/src/lib/schemas.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@ export const CONCURRENCY_MIN = 1
1212
export const CONCURRENCY_MAX = 25
1313
export const CONCURRENCY_DEFAULT = 1
1414

15+
export const TIMEOUT_MIN = 5
16+
export const TIMEOUT_MAX = 10
17+
export const TIMEOUT_DEFAULT = 5
18+
1519
export const createRunSchema = z
1620
.object({
1721
model: z.string().min(1, { message: "Model is required." }),
@@ -20,6 +24,7 @@ export const createRunSchema = z
2024
exercises: z.array(z.string()).optional(),
2125
settings: rooCodeSettingsSchema.optional(),
2226
concurrency: z.number().int().min(CONCURRENCY_MIN).max(CONCURRENCY_MAX),
27+
timeout: z.number().int().min(TIMEOUT_MIN).max(TIMEOUT_MAX),
2328
systemPrompt: z.string().optional(),
2429
})
2530
.refine((data) => data.suite === "full" || (data.exercises || []).length > 0, {

packages/evals/src/cli/redis.ts

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
import { createClient, type RedisClientType } from "redis"
22

3-
import { EVALS_TIMEOUT } from "@roo-code/types"
4-
53
let redis: RedisClientType | undefined
64

75
export const redisClient = async () => {
@@ -18,11 +16,19 @@ export const getPubSubKey = (runId: number) => `evals:${runId}`
1816
export const getRunnersKey = (runId: number) => `runners:${runId}`
1917
export const getHeartbeatKey = (runId: number) => `heartbeat:${runId}`
2018

21-
export const registerRunner = async ({ runId, taskId }: { runId: number; taskId: number }) => {
19+
export const registerRunner = async ({
20+
runId,
21+
taskId,
22+
timeoutSeconds,
23+
}: {
24+
runId: number
25+
taskId: number
26+
timeoutSeconds: number
27+
}) => {
2228
const redis = await redisClient()
2329
const runnersKey = getRunnersKey(runId)
2430
await redis.sAdd(runnersKey, `task-${taskId}:${process.env.HOSTNAME ?? process.pid}`)
25-
await redis.expire(runnersKey, EVALS_TIMEOUT / 1_000)
31+
await redis.expire(runnersKey, timeoutSeconds)
2632
}
2733

2834
export const deregisterRunner = async ({ runId, taskId }: { runId: number; taskId: number }) => {

packages/evals/src/cli/runTask.ts

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,7 @@ import * as os from "node:os"
55
import pWaitFor from "p-wait-for"
66
import { execa } from "execa"
77

8-
import {
9-
type TaskEvent,
10-
TaskCommandName,
11-
RooCodeEventName,
12-
IpcMessageType,
13-
EVALS_SETTINGS,
14-
EVALS_TIMEOUT,
15-
} from "@roo-code/types"
8+
import { type TaskEvent, TaskCommandName, RooCodeEventName, IpcMessageType, EVALS_SETTINGS } from "@roo-code/types"
169
import { IpcClient } from "@roo-code/ipc"
1710

1811
import {
@@ -42,7 +35,7 @@ export const processTask = async ({ taskId, logger }: { taskId: number; logger?:
4235
const task = await findTask(taskId)
4336
const { language, exercise } = task
4437
const run = await findRun(task.runId)
45-
await registerRunner({ runId: run.id, taskId })
38+
await registerRunner({ runId: run.id, taskId, timeoutSeconds: (run.timeout || 5) * 60 })
4639

4740
const containerized = isDockerContainer()
4841

@@ -304,9 +297,10 @@ export const runTask = async ({ run, task, publish, logger }: RunTaskOptions) =>
304297
})
305298

306299
try {
300+
const timeoutMs = (run.timeout || 5) * 60 * 1_000 // Convert minutes to milliseconds
307301
await pWaitFor(() => !!taskFinishedAt || !!taskAbortedAt || isClientDisconnected, {
308302
interval: 1_000,
309-
timeout: EVALS_TIMEOUT,
303+
timeout: timeoutMs,
310304
})
311305
} catch (_error) {
312306
taskTimedOut = true
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
ALTER TABLE "runs" ADD COLUMN "timeout" integer DEFAULT 5 NOT NULL;

packages/evals/src/db/queries/__tests__/copyRun.spec.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ describe("copyRun", () => {
2323
socketPath: "/tmp/roo.sock",
2424
description: "Test run for copying",
2525
concurrency: 4,
26+
timeout: 5,
2627
})
2728

2829
sourceRunId = run.id
@@ -271,7 +272,7 @@ describe("copyRun", () => {
271272
})
272273

273274
it("should copy run without task metrics", async () => {
274-
const minimalRun = await createRun({ model: "gpt-3.5-turbo", socketPath: "/tmp/minimal.sock" })
275+
const minimalRun = await createRun({ model: "gpt-3.5-turbo", socketPath: "/tmp/minimal.sock", timeout: 5 })
275276

276277
const newRunId = await copyRun({ sourceDb: db, targetDb: db, runId: minimalRun.id })
277278

packages/evals/src/db/schema.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ export const runs = pgTable("runs", {
1818
pid: integer(),
1919
socketPath: text("socket_path").notNull(),
2020
concurrency: integer().default(2).notNull(),
21+
timeout: integer().default(5).notNull(),
2122
passed: integer().default(0).notNull(),
2223
failed: integer().default(0).notNull(),
2324
createdAt: timestamp("created_at").notNull(),

0 commit comments

Comments
 (0)