Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions evals/apps/cli/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ import { getExercises } from "./exercises.js"
type TaskResult = { success: boolean; retry: boolean }
type TaskPromise = Promise<TaskResult>

const MAX_CONCURRENCY = 5
const TASK_TIMEOUT = 10 * 60 * 1_000
const UNIT_TEST_TIMEOUT = 60 * 1_000

Expand Down Expand Up @@ -78,12 +77,14 @@ const run = async (toolbox: GluegunToolbox) => {
const exercises = getExercises()[language as ExerciseLanguage]

await pMap(exercises, (exercise) => createTask({ runId: run.id, language, exercise }), {
concurrency: 10,
concurrency: run.concurrency,
})
}
} else if (exercise === "all") {
const exercises = getExercises()[language as ExerciseLanguage]
await pMap(exercises, (exercise) => createTask({ runId: run.id, language, exercise }), { concurrency: 10 })
await pMap(exercises, (exercise) => createTask({ runId: run.id, language, exercise }), {
concurrency: run.concurrency,
})
} else {
language = language || (await askLanguage(prompt))
exercise = exercise || (await askExercise(prompt, language))
Expand Down Expand Up @@ -145,13 +146,14 @@ const run = async (toolbox: GluegunToolbox) => {
}

let delay = 0

for (const task of tasks) {
const promise = processTask(task, delay)
delay = delay + 5_000
runningPromises.push(promise)
promise.then(() => processTaskResult(task, promise))

if (runningPromises.length >= MAX_CONCURRENCY) {
if (runningPromises.length >= run.concurrency) {
delay = 0
await Promise.race(runningPromises)
}
Expand Down Expand Up @@ -179,7 +181,7 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server
// subprocess.stdout.pipe(process.stdout)

// Sleep for a random amount of time before opening a new VSCode window.
await new Promise((resolve) => setTimeout(resolve, 1_000 + Math.random() * MAX_CONCURRENCY * 1_000))
await new Promise((resolve) => setTimeout(resolve, 1_000 + Math.random() * 5_000))
console.log(`Opening new VS Code window at ${workspacePath}`)

await execa({
Expand Down
1 change: 1 addition & 0 deletions evals/apps/web/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
"@radix-ui/react-scroll-area": "^1.2.3",
"@radix-ui/react-select": "^2.1.6",
"@radix-ui/react-separator": "^1.1.2",
"@radix-ui/react-slider": "^1.2.4",
"@radix-ui/react-slot": "^1.1.2",
"@radix-ui/react-tabs": "^1.1.3",
"@radix-ui/react-tooltip": "^1.1.8",
Expand Down
35 changes: 33 additions & 2 deletions evals/apps/web/src/app/runs/new/new-run.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,13 @@ import { X, Rocket, Check, ChevronsUpDown, HardDriveUpload, CircleCheck } from "
import { globalSettingsSchema, providerSettingsSchema, rooCodeDefaults } from "@evals/types"

import { createRun } from "@/lib/server/runs"
import { createRunSchema as formSchema, type CreateRun as FormValues } from "@/lib/schemas"
import {
createRunSchema as formSchema,
type CreateRun as FormValues,
CONCURRENCY_MIN,
CONCURRENCY_MAX,
CONCURRENCY_DEFAULT,
} from "@/lib/schemas"
import { cn } from "@/lib/utils"
import { useOpenRouterModels } from "@/hooks/use-open-router-models"
import { useExercises } from "@/hooks/use-exercises"
Expand All @@ -38,6 +44,7 @@ import {
PopoverContent,
PopoverTrigger,
ScrollArea,
Slider,
} from "@/components/ui"

import { SettingsDiff } from "./settings-diff"
Expand All @@ -63,6 +70,7 @@ export function NewRun() {
suite: "full",
exercises: [],
settings: undefined,
concurrency: CONCURRENCY_DEFAULT,
},
})

Expand All @@ -73,7 +81,7 @@ export function NewRun() {
formState: { isSubmitting },
} = form

const [model, suite, settings] = watch(["model", "suite", "settings"])
const [model, suite, settings] = watch(["model", "suite", "settings", "concurrency"])

const onSubmit = useCallback(
async (values: FormValues) => {
Expand Down Expand Up @@ -288,6 +296,29 @@ export function NewRun() {
)}
/>

<FormField
control={form.control}
name="concurrency"
render={({ field }) => (
<FormItem>
<FormLabel>Concurrency</FormLabel>
<FormControl>
<div className="flex flex-row items-center gap-2">
<Slider
defaultValue={[field.value]}
min={CONCURRENCY_MIN}
max={CONCURRENCY_MAX}
step={1}
onValueChange={(value) => field.onChange(value[0])}
/>
<div>{field.value}</div>
</div>
</FormControl>
<FormMessage />
</FormItem>
)}
/>

<FormField
control={form.control}
name="description"
Expand Down
1 change: 1 addition & 0 deletions evals/apps/web/src/components/ui/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ export * from "./popover"
export * from "./scroll-area"
export * from "./select"
export * from "./separator"
export * from "./slider"
export * from "./sonner"
export * from "./table"
export * from "./tabs"
Expand Down
56 changes: 56 additions & 0 deletions evals/apps/web/src/components/ui/slider.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
"use client"

import * as React from "react"
import * as SliderPrimitive from "@radix-ui/react-slider"

import { cn } from "@/lib/utils"

function Slider({
className,
defaultValue,
value,
min = 0,
max = 100,
...props
}: React.ComponentProps<typeof SliderPrimitive.Root>) {
const _values = React.useMemo(
() => (Array.isArray(value) ? value : Array.isArray(defaultValue) ? defaultValue : [min, max]),
[value, defaultValue, min, max],
)

return (
<SliderPrimitive.Root
data-slot="slider"
defaultValue={defaultValue}
value={value}
min={min}
max={max}
className={cn(
"relative flex w-full touch-none items-center select-none data-[disabled]:opacity-50 data-[orientation=vertical]:h-full data-[orientation=vertical]:min-h-44 data-[orientation=vertical]:w-auto data-[orientation=vertical]:flex-col",
className,
)}
{...props}>
<SliderPrimitive.Track
data-slot="slider-track"
className={cn(
"bg-muted relative grow overflow-hidden rounded-full data-[orientation=horizontal]:h-1.5 data-[orientation=horizontal]:w-full data-[orientation=vertical]:h-full data-[orientation=vertical]:w-1.5",
)}>
<SliderPrimitive.Range
data-slot="slider-range"
className={cn(
"bg-primary absolute data-[orientation=horizontal]:h-full data-[orientation=vertical]:w-full",
)}
/>
</SliderPrimitive.Track>
{Array.from({ length: _values.length }, (_, index) => (
<SliderPrimitive.Thumb
data-slot="slider-thumb"
key={index}
className="border-primary bg-accent block size-4 shrink-0 rounded-full border shadow-sm transition-[color,box-shadow] focus-visible:outline-hidden disabled:pointer-events-none disabled:opacity-50 cursor-pointer"
/>
))}
</SliderPrimitive.Root>
)
}

export { Slider }
5 changes: 5 additions & 0 deletions evals/apps/web/src/lib/schemas.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,18 @@ import { rooCodeSettingsSchema } from "@evals/types"
* CreateRun
*/

export const CONCURRENCY_MIN = 1
export const CONCURRENCY_MAX = 25
export const CONCURRENCY_DEFAULT = 2

export const createRunSchema = z
.object({
model: z.string().min(1, { message: "Model is required." }),
description: z.string().optional(),
suite: z.enum(["full", "partial"]),
exercises: z.array(z.string()).optional(),
settings: rooCodeSettingsSchema.optional(),
concurrency: z.number().int().min(CONCURRENCY_MIN).max(CONCURRENCY_MAX).default(CONCURRENCY_DEFAULT),
})
.refine((data) => data.suite === "full" || (data.exercises || []).length > 0, {
message: "Exercises are required when running a partial suite.",
Expand Down
12 changes: 6 additions & 6 deletions evals/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,14 @@
"drizzle:studio": "pnpm --filter @evals/db db:studio"
},
"devDependencies": {
"@dotenvx/dotenvx": "^1.39.0",
"@eslint/js": "^9.22.0",
"eslint": "^9.22.0",
"@dotenvx/dotenvx": "^1.39.1",
"@eslint/js": "^9.24.0",
"eslint": "^9.24.0",
"globals": "^16.0.0",
"prettier": "^3.5.3",
"tsx": "^4.19.3",
"turbo": "^2.4.4",
"typescript": "^5",
"typescript-eslint": "^8.26.0"
"turbo": "^2.5.0",
"typescript": "^5.8.3",
"typescript-eslint": "^8.29.1"
}
}
1 change: 1 addition & 0 deletions evals/packages/db/drizzle/0002_white_flatman.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ALTER TABLE `runs` ADD `concurrency` integer DEFAULT 2 NOT NULL;
Loading