Skip to content

Commit 4f476c8

Browse files
committed
More progress
1 parent a8ebf63 commit 4f476c8

File tree

12 files changed

+106
-414
lines changed

12 files changed

+106
-414
lines changed

apps/web-evals/package.json

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,6 @@
1010
"start": "next start"
1111
},
1212
"dependencies": {
13-
"@roo-code/evals": "workspace:^",
14-
"@roo-code/ipc": "workspace:^",
15-
"@roo-code/types": "workspace:^",
1613
"@hookform/resolvers": "^4.1.3",
1714
"@radix-ui/react-alert-dialog": "^1.1.7",
1815
"@radix-ui/react-dialog": "^1.1.6",
@@ -26,6 +23,9 @@
2623
"@radix-ui/react-slot": "^1.1.2",
2724
"@radix-ui/react-tabs": "^1.1.3",
2825
"@radix-ui/react-tooltip": "^1.1.8",
26+
"@roo-code/evals": "workspace:^",
27+
"@roo-code/ipc": "workspace:^",
28+
"@roo-code/types": "workspace:^",
2929
"@tanstack/react-query": "^5.69.0",
3030
"class-variance-authority": "^0.7.1",
3131
"clsx": "^2.1.1",
@@ -38,10 +38,10 @@
3838
"ps-tree": "^1.2.0",
3939
"react": "^18.3.1",
4040
"react-dom": "^18.3.1",
41-
"react-hook-form": "^7.54.2",
41+
"react-hook-form": "^7.57.0",
4242
"react-use": "^17.6.0",
43-
"sonner": "^2.0.2",
44-
"tailwind-merge": "^3.0.2",
43+
"sonner": "^2.0.5",
44+
"tailwind-merge": "^3.3.0",
4545
"tailwindcss-animate": "^1.0.7",
4646
"vaul": "^1.1.2",
4747
"zod": "^3.24.2"

packages/evals/package.json

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,9 @@
77
"scripts": {
88
"lint": "eslint src --ext=ts --max-warnings=0",
99
"check-types": "tsc --noEmit",
10-
"test": "dotenvx run -f .env.test -- vitest run",
10+
"_test": "dotenvx run -f .env.test -- vitest run",
1111
"clean": "rimraf dist .turbo",
12+
"cli": "dotenvx run -f .env.development -- tsx src/cli/index.ts",
1213
"drizzle-kit": "dotenvx run -f .env.development -- tsx node_modules/drizzle-kit/bin.cjs",
1314
"drizzle-kit:test": "dotenvx run -f .env.test -- tsx node_modules/drizzle-kit/bin.cjs",
1415
"db:start": "docker compose up -d",
@@ -24,9 +25,9 @@
2425
"@roo-code/ipc": "workspace:^",
2526
"@roo-code/types": "workspace:^",
2627
"better-sqlite3": "^11.10.0",
28+
"cmd-ts": "^0.13.0",
2729
"drizzle-orm": "^0.44.1",
28-
"execa": "^9.5.2",
29-
"gluegun": "^5.2.0",
30+
"execa": "^9.6.0",
3031
"node-ipc": "^12.0.0",
3132
"p-map": "^7.0.3",
3233
"p-wait-for": "^5.0.2",

packages/evals/scripts/setup.sh

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -323,8 +323,7 @@ if [[ ! -s .env ]]; then
323323
fi
324324

325325
echo -n "🗄️ Syncing Roo Code evals database... "
326-
pnpm --filter @roo-code/evals db:push &>/dev/null || exit 1
327-
pnpm --filter @roo-code/evals db:enable-wal &>/dev/null || exit 1
326+
pnpm --filter @roo-code/evals db:push --force &>/dev/null || exit 1
328327
echo "✅ Done"
329328

330329
if ! grep -q "OPENROUTER_API_KEY" .env; then

packages/evals/src/cli/index.ts

Lines changed: 15 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import * as path from "path"
33

44
import pWaitFor from "p-wait-for"
55
import { execa, parseCommandString } from "execa"
6-
import { build, GluegunToolbox } from "gluegun"
6+
import { command, run, number, positional } from "cmd-ts"
77
import psTree from "ps-tree"
88

99
import { RooCodeEventName, IpcOrigin, IpcMessageType, TaskCommandName } from "@roo-code/types"
@@ -20,7 +20,7 @@ import {
2020
updateTaskMetrics,
2121
createToolError,
2222
} from "../db/index.js"
23-
import { __dirname, extensionDevelopmentPath, exercisesPath, type ExerciseLanguage } from "../exercises/index.js"
23+
import { type ExerciseLanguage, exercisesPath } from "../exercises/index.js"
2424

2525
type TaskResult = { success: boolean }
2626
type TaskPromise = Promise<TaskResult>
@@ -37,14 +37,7 @@ const testCommands: Record<ExerciseLanguage, { commands: string[]; timeout?: num
3737
rust: { commands: ["cargo test"] }, // timeout 15s bash -c "cd '$dir' && cargo test > /dev/null 2>&1"
3838
}
3939

40-
const run = async (toolbox: GluegunToolbox) => {
41-
const { config } = toolbox
42-
const id = config.runId ? Number(config.runId) : undefined
43-
44-
if (!id) {
45-
throw new Error("Run ID is required.")
46-
}
47-
40+
const runEvals = async (id: number) => {
4841
const run = await findRun(id)
4942
const tasks = await getTasks(run.id)
5043

@@ -425,43 +418,23 @@ const runUnitTest = async ({ task }: { task: Task }) => {
425418
}
426419

427420
const main = async () => {
428-
const cli = build()
429-
.brand("cli")
430-
.src(__dirname)
431-
.help()
432-
.version()
433-
.command({
434-
name: "run",
435-
description: "Run an eval",
436-
run: ({ config, parameters }) => {
437-
config.language = parameters.first
438-
config.exercise = parameters.second
439-
440-
if (parameters.options["runId"]) {
441-
config.runId = parameters.options["runId"]
442-
}
421+
const result = await run(
422+
command({
423+
name: "cli",
424+
description: "Execute an eval run.",
425+
version: "0.0.0",
426+
args: {
427+
runId: positional({ type: number, displayName: "runId" }),
443428
},
444-
})
445-
.defaultCommand()
446-
.create()
447-
448-
const toolbox = await cli.run(process.argv)
449-
const { command } = toolbox
450-
451-
switch (command?.name) {
452-
case "run":
453-
await run(toolbox)
454-
break
455-
}
429+
handler: (args) => runEvals(args.runId),
430+
}),
431+
process.argv.slice(2),
432+
)
456433

434+
console.log(result)
457435
process.exit(0)
458436
}
459437

460-
if (!fs.existsSync(extensionDevelopmentPath)) {
461-
console.error(`"extensionDevelopmentPath" does not exist.`)
462-
process.exit(1)
463-
}
464-
465438
if (!fs.existsSync(exercisesPath)) {
466439
console.error(
467440
`Exercises do not exist at ${exercisesPath}. Please run "git clone https://github.com/RooCodeInc/Roo-Code-Evals.git evals".`,

packages/evals/src/db/migrations/meta/0000_snapshot.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"id": "caee25bc-e9ae-4d17-8448-11b879c8b66d",
2+
"id": "b50d5e6a-0f3f-4605-a5e7-9351711fc5e4",
33
"prevId": "00000000-0000-0000-0000-000000000000",
44
"version": "7",
55
"dialect": "postgresql",

packages/evals/src/db/migrations/meta/_journal.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
{
66
"idx": 0,
77
"version": "7",
8-
"when": 1748933185613,
9-
"tag": "0000_old_gorilla_man",
8+
"when": 1748937674449,
9+
"tag": "0000_young_trauma",
1010
"breakpoints": true
1111
}
1212
]

packages/evals/src/db/schema.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
import { pgTable, text, timestamp, integer, real, boolean, jsonb, uniqueIndex } from "drizzle-orm/pg-core"
22
import { relations } from "drizzle-orm"
33

4-
import { type RooCodeSettings, ToolName, type ToolUsage, toolNames } from "@roo-code/types"
4+
import type { RooCodeSettings, ToolName, ToolUsage } from "@roo-code/types"
55

6-
import { type ExerciseLanguage, exerciseLanguages } from "../exercises/index.js"
6+
import type { ExerciseLanguage } from "../exercises/index.js"
77

88
/**
99
* runs
@@ -45,7 +45,7 @@ export const tasks = pgTable(
4545
.references(() => runs.id)
4646
.notNull(),
4747
taskMetricsId: integer("task_metrics_id").references(() => taskMetrics.id),
48-
language: text({ enum: exerciseLanguages }).notNull().$type<ExerciseLanguage>(),
48+
language: text().notNull().$type<ExerciseLanguage>(),
4949
exercise: text().notNull(),
5050
passed: boolean(),
5151
startedAt: timestamp("started_at"),
@@ -97,7 +97,7 @@ export const toolErrors = pgTable("toolErrors", {
9797
id: integer().primaryKey().generatedAlwaysAsIdentity(),
9898
runId: integer("run_id").references(() => runs.id),
9999
taskId: integer("task_id").references(() => tasks.id),
100-
toolName: text("tool_name", { enum: toolNames }).notNull().$type<ToolName>(),
100+
toolName: text("tool_name").notNull().$type<ToolName>(),
101101
error: text().notNull(),
102102
createdAt: timestamp("created_at").notNull(),
103103
})

packages/evals/src/exercises/exercises.ts

Lines changed: 0 additions & 36 deletions
This file was deleted.
Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,25 @@
1-
export * from "./exercises.js"
2-
export * from "./paths.js"
1+
import * as path from "path"
2+
import * as fs from "fs/promises"
3+
import { fileURLToPath } from "url"
4+
5+
const __dirname = path.dirname(fileURLToPath(import.meta.url))
6+
7+
export const exercisesPath = path.resolve(__dirname, "..", "..", "..", "..", "..", "evals")
8+
9+
export const exerciseLanguages = ["go", "java", "javascript", "python", "rust"] as const
10+
11+
export type ExerciseLanguage = (typeof exerciseLanguages)[number]
12+
13+
const listDirectories = async (relativePath: string) => {
14+
try {
15+
const targetPath = path.resolve(__dirname, relativePath)
16+
const entries = await fs.readdir(targetPath, { withFileTypes: true })
17+
return entries.filter((entry) => entry.isDirectory() && !entry.name.startsWith(".")).map((entry) => entry.name)
18+
} catch (error) {
19+
console.error(`Error listing directories at ${relativePath}:`, error)
20+
return []
21+
}
22+
}
23+
24+
export const getExercisesForLanguage = async (language: ExerciseLanguage) =>
25+
listDirectories(path.join(exercisesPath, language))

0 commit comments

Comments
 (0)