Skip to content

Commit 6ab9aa9

Browse files
authored
Control evals concurrency in web app (#2265)
1 parent 1f6da88 commit 6ab9aa9

File tree

14 files changed

+943
-64
lines changed

14 files changed

+943
-64
lines changed

evals/apps/cli/src/index.ts

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ import { getExercises } from "./exercises.js"
3636
type TaskResult = { success: boolean; retry: boolean }
3737
type TaskPromise = Promise<TaskResult>
3838

39-
const MAX_CONCURRENCY = 5
4039
const TASK_TIMEOUT = 10 * 60 * 1_000
4140
const UNIT_TEST_TIMEOUT = 60 * 1_000
4241

@@ -78,12 +77,14 @@ const run = async (toolbox: GluegunToolbox) => {
7877
const exercises = getExercises()[language as ExerciseLanguage]
7978

8079
await pMap(exercises, (exercise) => createTask({ runId: run.id, language, exercise }), {
81-
concurrency: 10,
80+
concurrency: run.concurrency,
8281
})
8382
}
8483
} else if (exercise === "all") {
8584
const exercises = getExercises()[language as ExerciseLanguage]
86-
await pMap(exercises, (exercise) => createTask({ runId: run.id, language, exercise }), { concurrency: 10 })
85+
await pMap(exercises, (exercise) => createTask({ runId: run.id, language, exercise }), {
86+
concurrency: run.concurrency,
87+
})
8788
} else {
8889
language = language || (await askLanguage(prompt))
8990
exercise = exercise || (await askExercise(prompt, language))
@@ -145,13 +146,14 @@ const run = async (toolbox: GluegunToolbox) => {
145146
}
146147

147148
let delay = 0
149+
148150
for (const task of tasks) {
149151
const promise = processTask(task, delay)
150152
delay = delay + 5_000
151153
runningPromises.push(promise)
152154
promise.then(() => processTaskResult(task, promise))
153155

154-
if (runningPromises.length >= MAX_CONCURRENCY) {
156+
if (runningPromises.length >= run.concurrency) {
155157
delay = 0
156158
await Promise.race(runningPromises)
157159
}
@@ -179,7 +181,7 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server
179181
// subprocess.stdout.pipe(process.stdout)
180182

181183
// Sleep for a random amount of time before opening a new VSCode window.
182-
await new Promise((resolve) => setTimeout(resolve, 1_000 + Math.random() * MAX_CONCURRENCY * 1_000))
184+
await new Promise((resolve) => setTimeout(resolve, 1_000 + Math.random() * 5_000))
183185
console.log(`Opening new VS Code window at ${workspacePath}`)
184186

185187
await execa({

evals/apps/web/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
"@radix-ui/react-scroll-area": "^1.2.3",
2121
"@radix-ui/react-select": "^2.1.6",
2222
"@radix-ui/react-separator": "^1.1.2",
23+
"@radix-ui/react-slider": "^1.2.4",
2324
"@radix-ui/react-slot": "^1.1.2",
2425
"@radix-ui/react-tabs": "^1.1.3",
2526
"@radix-ui/react-tooltip": "^1.1.8",

evals/apps/web/src/app/runs/new/new-run.tsx

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,13 @@ import { X, Rocket, Check, ChevronsUpDown, HardDriveUpload, CircleCheck } from "
1212
import { globalSettingsSchema, providerSettingsSchema, rooCodeDefaults } from "@evals/types"
1313

1414
import { createRun } from "@/lib/server/runs"
15-
import { createRunSchema as formSchema, type CreateRun as FormValues } from "@/lib/schemas"
15+
import {
16+
createRunSchema as formSchema,
17+
type CreateRun as FormValues,
18+
CONCURRENCY_MIN,
19+
CONCURRENCY_MAX,
20+
CONCURRENCY_DEFAULT,
21+
} from "@/lib/schemas"
1622
import { cn } from "@/lib/utils"
1723
import { useOpenRouterModels } from "@/hooks/use-open-router-models"
1824
import { useExercises } from "@/hooks/use-exercises"
@@ -38,6 +44,7 @@ import {
3844
PopoverContent,
3945
PopoverTrigger,
4046
ScrollArea,
47+
Slider,
4148
} from "@/components/ui"
4249

4350
import { SettingsDiff } from "./settings-diff"
@@ -63,6 +70,7 @@ export function NewRun() {
6370
suite: "full",
6471
exercises: [],
6572
settings: undefined,
73+
concurrency: CONCURRENCY_DEFAULT,
6674
},
6775
})
6876

@@ -73,7 +81,7 @@ export function NewRun() {
7381
formState: { isSubmitting },
7482
} = form
7583

76-
const [model, suite, settings] = watch(["model", "suite", "settings"])
84+
const [model, suite, settings] = watch(["model", "suite", "settings", "concurrency"])
7785

7886
const onSubmit = useCallback(
7987
async (values: FormValues) => {
@@ -288,6 +296,29 @@ export function NewRun() {
288296
)}
289297
/>
290298

299+
<FormField
300+
control={form.control}
301+
name="concurrency"
302+
render={({ field }) => (
303+
<FormItem>
304+
<FormLabel>Concurrency</FormLabel>
305+
<FormControl>
306+
<div className="flex flex-row items-center gap-2">
307+
<Slider
308+
defaultValue={[field.value]}
309+
min={CONCURRENCY_MIN}
310+
max={CONCURRENCY_MAX}
311+
step={1}
312+
onValueChange={(value) => field.onChange(value[0])}
313+
/>
314+
<div>{field.value}</div>
315+
</div>
316+
</FormControl>
317+
<FormMessage />
318+
</FormItem>
319+
)}
320+
/>
321+
291322
<FormField
292323
control={form.control}
293324
name="description"

evals/apps/web/src/components/ui/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ export * from "./popover"
1111
export * from "./scroll-area"
1212
export * from "./select"
1313
export * from "./separator"
14+
export * from "./slider"
1415
export * from "./sonner"
1516
export * from "./table"
1617
export * from "./tabs"
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
"use client"
2+
3+
import * as React from "react"
4+
import * as SliderPrimitive from "@radix-ui/react-slider"
5+
6+
import { cn } from "@/lib/utils"
7+
8+
function Slider({
9+
className,
10+
defaultValue,
11+
value,
12+
min = 0,
13+
max = 100,
14+
...props
15+
}: React.ComponentProps<typeof SliderPrimitive.Root>) {
16+
const _values = React.useMemo(
17+
() => (Array.isArray(value) ? value : Array.isArray(defaultValue) ? defaultValue : [min, max]),
18+
[value, defaultValue, min, max],
19+
)
20+
21+
return (
22+
<SliderPrimitive.Root
23+
data-slot="slider"
24+
defaultValue={defaultValue}
25+
value={value}
26+
min={min}
27+
max={max}
28+
className={cn(
29+
"relative flex w-full touch-none items-center select-none data-[disabled]:opacity-50 data-[orientation=vertical]:h-full data-[orientation=vertical]:min-h-44 data-[orientation=vertical]:w-auto data-[orientation=vertical]:flex-col",
30+
className,
31+
)}
32+
{...props}>
33+
<SliderPrimitive.Track
34+
data-slot="slider-track"
35+
className={cn(
36+
"bg-muted relative grow overflow-hidden rounded-full data-[orientation=horizontal]:h-1.5 data-[orientation=horizontal]:w-full data-[orientation=vertical]:h-full data-[orientation=vertical]:w-1.5",
37+
)}>
38+
<SliderPrimitive.Range
39+
data-slot="slider-range"
40+
className={cn(
41+
"bg-primary absolute data-[orientation=horizontal]:h-full data-[orientation=vertical]:w-full",
42+
)}
43+
/>
44+
</SliderPrimitive.Track>
45+
{Array.from({ length: _values.length }, (_, index) => (
46+
<SliderPrimitive.Thumb
47+
data-slot="slider-thumb"
48+
key={index}
49+
className="border-primary bg-accent block size-4 shrink-0 rounded-full border shadow-sm transition-[color,box-shadow] focus-visible:outline-hidden disabled:pointer-events-none disabled:opacity-50 cursor-pointer"
50+
/>
51+
))}
52+
</SliderPrimitive.Root>
53+
)
54+
}
55+
56+
export { Slider }

evals/apps/web/src/lib/schemas.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,18 @@ import { rooCodeSettingsSchema } from "@evals/types"
66
* CreateRun
77
*/
88

9+
export const CONCURRENCY_MIN = 1
10+
export const CONCURRENCY_MAX = 25
11+
export const CONCURRENCY_DEFAULT = 2
12+
913
export const createRunSchema = z
1014
.object({
1115
model: z.string().min(1, { message: "Model is required." }),
1216
description: z.string().optional(),
1317
suite: z.enum(["full", "partial"]),
1418
exercises: z.array(z.string()).optional(),
1519
settings: rooCodeSettingsSchema.optional(),
20+
concurrency: z.number().int().min(CONCURRENCY_MIN).max(CONCURRENCY_MAX).default(CONCURRENCY_DEFAULT),
1621
})
1722
.refine((data) => data.suite === "full" || (data.exercises || []).length > 0, {
1823
message: "Exercises are required when running a partial suite.",

evals/package.json

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,14 @@
1313
"drizzle:studio": "pnpm --filter @evals/db db:studio"
1414
},
1515
"devDependencies": {
16-
"@dotenvx/dotenvx": "^1.39.0",
17-
"@eslint/js": "^9.22.0",
18-
"eslint": "^9.22.0",
16+
"@dotenvx/dotenvx": "^1.39.1",
17+
"@eslint/js": "^9.24.0",
18+
"eslint": "^9.24.0",
1919
"globals": "^16.0.0",
2020
"prettier": "^3.5.3",
2121
"tsx": "^4.19.3",
22-
"turbo": "^2.4.4",
23-
"typescript": "^5",
24-
"typescript-eslint": "^8.26.0"
22+
"turbo": "^2.5.0",
23+
"typescript": "^5.8.3",
24+
"typescript-eslint": "^8.29.1"
2525
}
2626
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
ALTER TABLE `runs` ADD `concurrency` integer DEFAULT 2 NOT NULL;

0 commit comments

Comments
 (0)