From 431a395ef1f6ad746b8d891d56d604e2ce1fe492 Mon Sep 17 00:00:00 2001 From: cte Date: Sun, 13 Apr 2025 00:49:51 -0700 Subject: [PATCH 1/2] Evals improvements --- evals/apps/cli/package.json | 6 +- evals/apps/cli/src/index.ts | 111 ++++++++++++------ evals/apps/web/src/app/runs/[id]/run.tsx | 97 +++++++-------- .../web/src/app/runs/[id]/task-status.tsx | 7 +- evals/apps/web/src/app/runs/new/new-run.tsx | 14 ++- evals/apps/web/src/hooks/use-process-tree.ts | 1 + evals/apps/web/src/hooks/use-run-status.ts | 43 ++++--- evals/packages/db/src/schema.ts | 4 +- evals/packages/ipc/src/client.ts | 2 +- evals/packages/ipc/src/server.ts | 2 +- evals/packages/types/src/roo-code-defaults.ts | 32 ++--- evals/packages/types/src/roo-code.ts | 14 +++ evals/pnpm-lock.yaml | 6 + evals/scripts/setup.sh | 33 ++++-- 14 files changed, 215 insertions(+), 157 deletions(-) diff --git a/evals/apps/cli/package.json b/evals/apps/cli/package.json index 3e7da0266e9..1b54765954c 100644 --- a/evals/apps/cli/package.json +++ b/evals/apps/cli/package.json @@ -16,10 +16,12 @@ "execa": "^9.5.2", "gluegun": "^5.1.2", "p-map": "^7.0.3", - "p-wait-for": "^5.0.2" + "p-wait-for": "^5.0.2", + "ps-tree": "^1.2.0" }, "devDependencies": { "@evals/eslint-config": "workspace:^", - "@evals/typescript-config": "workspace:^" + "@evals/typescript-config": "workspace:^", + "@types/ps-tree": "^1.1.6" } } diff --git a/evals/apps/cli/src/index.ts b/evals/apps/cli/src/index.ts index 55474f15f86..2491b16ef68 100644 --- a/evals/apps/cli/src/index.ts +++ b/evals/apps/cli/src/index.ts @@ -6,6 +6,7 @@ import pMap from "p-map" import pWaitFor from "p-wait-for" import { execa, parseCommandString } from "execa" import { build, filesystem, GluegunPrompt, GluegunToolbox } from "gluegun" +import psTree from "ps-tree" import { type ExerciseLanguage, @@ -36,8 +37,9 @@ import { getExercises } from "./exercises.js" type TaskResult = { success: boolean; retry: boolean } type TaskPromise = Promise -const TASK_TIMEOUT = 10 * 60 * 1_000 -const UNIT_TEST_TIMEOUT = 60 * 1_000 +const TASK_START_DELAY = 10 * 1_000 +const TASK_TIMEOUT = 5 * 60 * 1_000 +const UNIT_TEST_TIMEOUT = 2 * 60 * 1_000 const testCommands: Record = { go: { commands: ["go test"] }, // timeout 15s bash -c "cd '$dir' && go test > /dev/null 2>&1" @@ -98,13 +100,11 @@ const run = async (toolbox: GluegunToolbox) => { throw new Error("No tasks found.") } - console.log(await execa({ cwd: exercisesPath })`git config user.name "Roo Code"`) - console.log(await execa({ cwd: exercisesPath })`git config user.email "support@roocode.com"`) - console.log(await execa({ cwd: exercisesPath })`git checkout -f`) - console.log(await execa({ cwd: exercisesPath })`git clean -fd`) - console.log( - await execa({ cwd: exercisesPath })`git checkout -b runs/${run.id}-${crypto.randomUUID().slice(0, 8)} main`, - ) + await execa({ cwd: exercisesPath })`git config user.name "Roo Code"` + await execa({ cwd: exercisesPath })`git config user.email "support@roocode.com"` + await execa({ cwd: exercisesPath })`git checkout -f` + await execa({ cwd: exercisesPath })`git clean -fd` + await execa({ cwd: exercisesPath })`git checkout -b runs/${run.id}-${crypto.randomUUID().slice(0, 8)} main` fs.writeFileSync( path.resolve(exercisesPath, "settings.json"), @@ -145,11 +145,11 @@ const run = async (toolbox: GluegunToolbox) => { } } - let delay = 0 + let delay = TASK_START_DELAY for (const task of tasks) { const promise = processTask(task, delay) - delay = delay + 5_000 + delay = delay + TASK_START_DELAY runningPromises.push(promise) promise.then(() => processTaskResult(task, promise)) @@ -162,10 +162,10 @@ const run = async (toolbox: GluegunToolbox) => { await Promise.all(runningPromises) const result = await finishRun(run.id) - console.log("[cli#run]", result) + console.log(`${Date.now()} [cli#run]`, result) - console.log(await execa({ cwd: exercisesPath })`git add .`) - console.log(await execa({ cwd: exercisesPath })`git commit -m ${`Run #${run.id}`} --no-verify`) + await execa({ cwd: exercisesPath })`git add .` + await execa({ cwd: exercisesPath })`git commit -m ${`Run #${run.id}`} --no-verify` } const runExercise = async ({ run, task, server }: { run: Run; task: Task; server: IpcServer }): TaskPromise => { @@ -180,9 +180,7 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server // Don't await execa and store result as subprocess. // subprocess.stdout.pipe(process.stdout) - // Sleep for a random amount of time before opening a new VSCode window. - await new Promise((resolve) => setTimeout(resolve, 1_000 + Math.random() * 5_000)) - console.log(`Opening new VS Code window at ${workspacePath}`) + console.log(`${Date.now()} [cli#runExercise] Opening new VS Code window at ${workspacePath}`) await execa({ env: { @@ -192,15 +190,15 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server })`code --disable-workspace-trust -n ${workspacePath}` // Give VSCode some time to spawn before connecting to its unix socket. - await new Promise((resolve) => setTimeout(resolve, 1_000 + Math.random() * 4_000)) - console.log(`Connecting to ${taskSocketPath}`) + await new Promise((resolve) => setTimeout(resolve, 3_000)) + console.log(`${Date.now()} [cli#runExercise] Connecting to ${taskSocketPath}`) const client = new IpcClient(taskSocketPath) try { await pWaitFor(() => client.isReady, { interval: 250, timeout: 5_000 }) // eslint-disable-next-line @typescript-eslint/no-unused-vars } catch (error) { - console.log(`[cli#runExercise | ${language} / ${exercise}] unable to connect`) + console.log(`${Date.now()} [cli#runExercise | ${language} / ${exercise}] unable to connect`) client.disconnect() return { success: false, retry: false } } @@ -220,16 +218,20 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server client.on(IpcMessageType.TaskEvent, async (taskEvent) => { const { eventName, payload } = taskEvent - server.broadcast({ - type: IpcMessageType.TaskEvent, - origin: IpcOrigin.Server, - relayClientId: client.clientId!, - data: { ...taskEvent, taskId: task.id }, - }) + if (taskEvent.eventName !== RooCodeEventName.Message) { + server.broadcast({ + type: IpcMessageType.TaskEvent, + origin: IpcOrigin.Server, + relayClientId: client.clientId!, + data: { ...taskEvent, taskId: task.id }, + }) + } if (!ignoreEvents.includes(eventName)) { - console.log(`[cli#runExercise | ${language} / ${exercise}] taskEvent -> ${eventName}`) - console.log(payload) + console.log( + `${Date.now()} [cli#runExercise | ${language} / ${exercise}] taskEvent -> ${eventName}`, + payload, + ) } if (eventName === RooCodeEventName.TaskStarted) { @@ -279,11 +281,11 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server }) client.on(IpcMessageType.Disconnect, async () => { - console.log(`[cli#runExercise | ${language} / ${exercise}] disconnect`) + console.log(`${Date.now()} [cli#runExercise | ${language} / ${exercise}] disconnect`) isClientDisconnected = true }) - console.log(`[cli#runExercise | ${language} / ${exercise}] starting task`) + console.log(`${Date.now()} [cli#runExercise | ${language} / ${exercise}] starting task`) client.sendMessage({ type: IpcMessageType.TaskCommand, @@ -307,7 +309,7 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server await pWaitFor(() => !!taskFinishedAt || isClientDisconnected, { interval: 1_000, timeout: TASK_TIMEOUT }) // eslint-disable-next-line @typescript-eslint/no-unused-vars } catch (error) { - console.log(`[cli#runExercise | ${language} / ${exercise}] time limit reached`) + console.log(`${Date.now()} [cli#runExercise | ${language} / ${exercise}] time limit reached`) // Cancel the task. if (rooTaskId && !isClientDisconnected) { @@ -351,17 +353,56 @@ const runUnitTest = async ({ task }: { task: Task }) => { let passed = true for (const command of commands) { - const timeout = cmd.timeout ?? UNIT_TEST_TIMEOUT - try { - const result = await execa({ cwd, shell: true, reject: false, timeout })`${command}` + console.log( + `${Date.now()} [cli#runUnitTest | ${task.language} / ${task.exercise}] running "${command.join(" ")}"`, + ) + const subprocess = execa({ cwd, shell: true, reject: false })`${command}` + + const timeout = setTimeout(async () => { + const descendants = await new Promise((resolve, reject) => { + psTree(subprocess.pid!, (err, children) => { + if (err) { + reject(err) + } + + resolve(children.map((p) => parseInt(p.PID))) + }) + }) + + if (descendants.length > 0) { + try { + console.log( + `${Date.now()} [cli#runUnitTest | ${task.language} / ${task.exercise}] killing ${descendants.join(" ")}`, + ) + + await execa`kill -9 ${descendants.join(" ")}` + } catch (error) { + console.error("Error killing descendant processes:", error) + } + } + + console.log( + `${Date.now()} [cli#runUnitTest | ${task.language} / ${task.exercise}] killing ${subprocess.pid}`, + ) + + await execa`kill -9 ${subprocess.pid!}` + }, UNIT_TEST_TIMEOUT) + + const result = await subprocess + + console.log( + `${Date.now()} [cli#runUnitTest | ${task.language} / ${task.exercise}] "${command.join(" ")}" result -> ${JSON.stringify(result)}`, + ) + + clearTimeout(timeout) if (result.failed) { passed = false break } } catch (error) { - console.log("[cli#runUnitTest]", error) + console.log(`${Date.now()} [cli#runUnitTest | ${task.language} / ${task.exercise}]`, error) passed = false break } diff --git a/evals/apps/web/src/app/runs/[id]/run.tsx b/evals/apps/web/src/app/runs/[id]/run.tsx index f9e1ac9f62d..84749fc9160 100644 --- a/evals/apps/web/src/app/runs/[id]/run.tsx +++ b/evals/apps/web/src/app/runs/[id]/run.tsx @@ -1,33 +1,44 @@ "use client" -import { useState, useRef } from "react" -import { LoaderCircle, SquareTerminal } from "lucide-react" +import { useMemo } from "react" +import { LoaderCircle } from "lucide-react" import * as db from "@evals/db" import { formatCurrency, formatDuration, formatTokens } from "@/lib" import { useRunStatus } from "@/hooks/use-run-status" -import { - Drawer, - DrawerContent, - DrawerHeader, - DrawerTitle, - ScrollArea, - Table, - TableBody, - TableCell, - TableHead, - TableHeader, - TableRow, -} from "@/components/ui" +import { Table, TableBody, TableCell, TableHead, TableHeader, TableRow } from "@/components/ui" import { TaskStatus } from "./task-status" import { ConnectionStatus } from "./connection-status" +type TaskMetrics = Pick + export function Run({ run }: { run: db.Run }) { - const { tasks, status, output, outputCounts } = useRunStatus(run) - const scrollAreaRef = useRef(null) - const [selectedTask, setSelectedTask] = useState() + const { tasks, status, tokenUsage, usageUpdatedAt } = useRunStatus(run) + + const taskMetrics: Record = useMemo(() => { + const metrics: Record = {} + + tasks?.forEach((task) => { + const usage = tokenUsage.get(task.id) + + if (task.finishedAt && task.taskMetrics) { + metrics[task.id] = task.taskMetrics + } else if (usage) { + metrics[task.id] = { + tokensIn: usage.totalTokensIn, + tokensOut: usage.totalTokensOut, + tokensContext: usage.contextTokens, + duration: usage.duration ?? 0, + cost: usage.totalCost, + } + } + }) + + return metrics + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [tasks, tokenUsage, usageUpdatedAt]) return ( <> @@ -57,38 +68,33 @@ export function Run({ run }: { run: db.Run }) {
- +
{task.language}/{task.exercise}
- {(outputCounts[task.id] ?? 0) > 0 && ( -
setSelectedTask(task)}> - -
- {outputCounts[task.id]} -
-
- )}
- {task.taskMetrics ? ( + {taskMetrics[task.id] ? ( <>
-
{formatTokens(task.taskMetrics.tokensIn)}
/ -
{formatTokens(task.taskMetrics.tokensOut)}
+
{formatTokens(taskMetrics[task.id]!.tokensIn)}
/ +
{formatTokens(taskMetrics[task.id]!.tokensOut)}
- {formatTokens(task.taskMetrics.tokensContext)} + {formatTokens(taskMetrics[task.id]!.tokensContext)} - {formatDuration(task.taskMetrics.duration)} + {taskMetrics[task.id]!.duration + ? formatDuration(taskMetrics[task.id]!.duration) + : "-"} - {formatCurrency(task.taskMetrics.cost)} + {formatCurrency(taskMetrics[task.id]!.cost)} ) : ( @@ -100,27 +106,6 @@ export function Run({ run }: { run: db.Run }) { )} - setSelectedTask(undefined)}> - -
- - - {selectedTask?.language}/{selectedTask?.exercise} - - -
- {selectedTask && ( - -
-

Tags

- {output.get(selectedTask.id)?.map((line, i) =>
{line}
)} -
-
- )} -
-
-
-
) } diff --git a/evals/apps/web/src/app/runs/[id]/task-status.tsx b/evals/apps/web/src/app/runs/[id]/task-status.tsx index 0c2ae4205d5..2e0b28b419f 100644 --- a/evals/apps/web/src/app/runs/[id]/task-status.tsx +++ b/evals/apps/web/src/app/runs/[id]/task-status.tsx @@ -4,16 +4,15 @@ import { type Task } from "@evals/db" type TaskStatusProps = { task: Task + running: boolean } -export const TaskStatus = ({ task }: TaskStatusProps) => { +export const TaskStatus = ({ task, running }: TaskStatusProps) => { return task.passed === false ? ( ) : task.passed === true ? ( - ) : task.startedAt ? ( - - ) : task.finishedAt ? ( + ) : running ? ( ) : ( diff --git a/evals/apps/web/src/app/runs/new/new-run.tsx b/evals/apps/web/src/app/runs/new/new-run.tsx index 247441264a1..ad3f9d7228f 100644 --- a/evals/apps/web/src/app/runs/new/new-run.tsx +++ b/evals/apps/web/src/app/runs/new/new-run.tsx @@ -86,13 +86,25 @@ export function NewRun() { const onSubmit = useCallback( async (values: FormValues) => { try { + if (mode === "openrouter") { + const openRouterModel = models.data?.find(({ id }) => id === model) + + if (!openRouterModel) { + throw new Error("Model not found.") + } + + const openRouterModelId = openRouterModel.id + const openRouterModelInfo = openRouterModel.modelInfo + values.settings = { ...(values.settings || {}), openRouterModelId, openRouterModelInfo } + } + const { id } = await createRun(values) router.push(`/runs/${id}`) } catch (e) { toast.error(e instanceof Error ? e.message : "An unknown error occurred.") } }, - [router], + [mode, model, models.data, router], ) const onFilterModels = useCallback( diff --git a/evals/apps/web/src/hooks/use-process-tree.ts b/evals/apps/web/src/hooks/use-process-tree.ts index 834e815f10c..35d7e7ce044 100644 --- a/evals/apps/web/src/hooks/use-process-tree.ts +++ b/evals/apps/web/src/hooks/use-process-tree.ts @@ -7,4 +7,5 @@ export const useProcessList = (pid: number | null) => queryKey: ["process-tree", pid], queryFn: () => (pid ? getProcessList(pid) : []), enabled: !!pid, + refetchInterval: 30_000, }) diff --git a/evals/apps/web/src/hooks/use-run-status.ts b/evals/apps/web/src/hooks/use-run-status.ts index a699dce38ee..3278e69e7a2 100644 --- a/evals/apps/web/src/hooks/use-run-status.ts +++ b/evals/apps/web/src/hooks/use-run-status.ts @@ -1,7 +1,7 @@ import { useState, useCallback, useRef } from "react" import { useQuery, keepPreviousData } from "@tanstack/react-query" -import { RooCodeEventName, taskEventSchema } from "@evals/types" +import { RooCodeEventName, taskEventSchema, TokenUsage } from "@evals/types" import { Run } from "@evals/db" import { getTasks } from "@/lib/server/tasks" @@ -9,14 +9,16 @@ import { useEventSource } from "@/hooks/use-event-source" export const useRunStatus = (run: Run) => { const [tasksUpdatedAt, setTasksUpdatedAt] = useState() - const outputRef = useRef>(new Map()) - const [outputCounts, setOutputCounts] = useState>({}) + const [usageUpdatedAt, setUsageUpdatedAt] = useState() + + const tokenUsage = useRef>(new Map()) + const startTimes = useRef>(new Map()) const { data: tasks } = useQuery({ queryKey: ["run", run.id, tasksUpdatedAt], queryFn: async () => getTasks(run.id), placeholderData: keepPreviousData, - refetchInterval: 10_000, + refetchInterval: 30_000, }) const url = `/api/runs/${run.id}/stream` @@ -47,28 +49,18 @@ export const useRunStatus = (run: Run) => { switch (eventName) { case RooCodeEventName.TaskStarted: + startTimes.current.set(taskId, Date.now()) + break case RooCodeEventName.TaskCompleted: case RooCodeEventName.TaskAborted: setTasksUpdatedAt(Date.now()) break - case RooCodeEventName.Message: { - const [ - { - message: { text }, - }, - ] = payload - - if (text) { - outputRef.current.set(taskId, [...(outputRef.current.get(taskId) || []), text]) - const outputCounts: Record = {} - - for (const [taskId, messages] of outputRef.current.entries()) { - outputCounts[taskId] = messages.length - } - - setOutputCounts(outputCounts) - } - + case RooCodeEventName.TaskTokenUsageUpdated: { + console.log("taskTokenUsageUpdated", payload) + const startTime = startTimes.current.get(taskId) + const duration = startTime ? Date.now() - startTime : undefined + tokenUsage.current.set(taskId, { ...payload[1], duration }) + setUsageUpdatedAt(Date.now()) break } } @@ -76,5 +68,10 @@ export const useRunStatus = (run: Run) => { const status = useEventSource({ url, onMessage }) - return { tasks, status, output: outputRef.current, outputCounts } + return { + status, + tasks, + tokenUsage: tokenUsage.current, + usageUpdatedAt, + } } diff --git a/evals/packages/db/src/schema.ts b/evals/packages/db/src/schema.ts index eb19de9fc09..522d5999fb1 100644 --- a/evals/packages/db/src/schema.ts +++ b/evals/packages/db/src/schema.ts @@ -2,7 +2,7 @@ import { sqliteTable, text, real, integer, blob, uniqueIndex } from "drizzle-orm import { relations } from "drizzle-orm" import { createInsertSchema } from "drizzle-zod" -import { GlobalSettings, exerciseLanguages, rooCodeSettingsSchema } from "@evals/types" +import { GlobalSettings, RooCodeSettings, exerciseLanguages, rooCodeSettingsSchema } from "@evals/types" /** * runs @@ -13,7 +13,7 @@ export const runs = sqliteTable("runs", { taskMetricsId: integer({ mode: "number" }).references(() => taskMetrics.id), model: text().notNull(), description: text(), - settings: blob({ mode: "json" }).$type(), + settings: blob({ mode: "json" }).$type(), pid: integer({ mode: "number" }), socketPath: text().notNull(), concurrency: integer({ mode: "number" }).default(2).notNull(), diff --git a/evals/packages/ipc/src/client.ts b/evals/packages/ipc/src/client.ts index 8b9c4c4b4b4..91e6b06cd0e 100644 --- a/evals/packages/ipc/src/client.ts +++ b/evals/packages/ipc/src/client.ts @@ -65,7 +65,7 @@ export class IpcClient extends EventEmitter { const result = ipcMessageSchema.safeParse(data) if (!result.success) { - this.log("[client#onMessage] invalid payload", data) + this.log("[client#onMessage] invalid payload", result.error, data) return } diff --git a/evals/packages/ipc/src/server.ts b/evals/packages/ipc/src/server.ts index e4c0138566f..cbd9cf930d9 100644 --- a/evals/packages/ipc/src/server.ts +++ b/evals/packages/ipc/src/server.ts @@ -83,7 +83,7 @@ export class IpcServer extends EventEmitter { const result = ipcMessageSchema.safeParse(data) if (!result.success) { - this.log("[server#onMessage] invalid payload", result.error) + this.log("[server#onMessage] invalid payload", result.error, data) return } diff --git a/evals/packages/types/src/roo-code-defaults.ts b/evals/packages/types/src/roo-code-defaults.ts index f126f33ff0e..dd7ff85775c 100644 --- a/evals/packages/types/src/roo-code-defaults.ts +++ b/evals/packages/types/src/roo-code-defaults.ts @@ -2,25 +2,9 @@ import { RooCodeSettings } from "./roo-code.js" export const rooCodeDefaults: RooCodeSettings = { apiProvider: "openrouter", - openRouterModelId: "google/gemini-2.0-flash-001", // "anthropic/claude-3.7-sonnet", + openRouterUseMiddleOutTransform: false, - // apiProvider: "openai", - // openAiBaseUrl: "http://hrudolph.duckdns.org:4269/api/v1", - // openAiApiKey: process.env.OPENAI_API_KEY, - // openAiModelId: "models/gemini-2.5-pro-exp-03-25", - // openAiCustomModelInfo: { - // maxTokens: 65536, - // contextWindow: 1000000, - // supportsImages: true, - // supportsPromptCache: false, - // inputPrice: 0, - // outputPrice: 0, - // description: - // "Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.", - // thinking: false, - // }, - - modelTemperature: null, + // modelTemperature: null, // reasoningEffort: "high", pinnedApiConfigs: {}, @@ -60,12 +44,18 @@ export const rooCodeDefaults: RooCodeSettings = { maxReadFileLine: 500, terminalOutputLineLimit: 500, - terminalShellIntegrationTimeout: 15000, + terminalShellIntegrationTimeout: 30_000, + // terminalCommandDelay: 0, + // terminalPowershellCounter: false, + // terminalZshClearEolMark: true, + // terminalZshOhMy: true, + // terminalZshP10k: false, + // terminalZdotdir: true, - diffEnabled: true, + diffEnabled: false, fuzzyMatchThreshold: 1.0, experiments: { - search_and_replace: true, + search_and_replace: false, insert_content: false, powerSteering: false, }, diff --git a/evals/packages/types/src/roo-code.ts b/evals/packages/types/src/roo-code.ts index 5a4082395b8..7c982f29446 100644 --- a/evals/packages/types/src/roo-code.ts +++ b/evals/packages/types/src/roo-code.ts @@ -396,6 +396,7 @@ const providerSettingsRecord: ProviderSettingsRecord = { apiModelId: undefined, apiKey: undefined, anthropicBaseUrl: undefined, + anthropicUseAuthToken: undefined, // Glama glamaModelId: undefined, glamaModelInfo: undefined, @@ -523,6 +524,12 @@ export const globalSettingsSchema = z.object({ terminalOutputLineLimit: z.number().optional(), terminalShellIntegrationTimeout: z.number().optional(), + terminalCommandDelay: z.number().optional(), + terminalPowershellCounter: z.boolean().optional(), + terminalZshClearEolMark: z.boolean().optional(), + terminalZshOhMy: z.boolean().optional(), + terminalZshP10k: z.boolean().optional(), + terminalZdotdir: z.boolean().optional(), diffEnabled: z.boolean().optional(), fuzzyMatchThreshold: z.number().optional(), @@ -592,6 +599,12 @@ const globalSettingsRecord: GlobalSettingsRecord = { terminalOutputLineLimit: undefined, terminalShellIntegrationTimeout: undefined, + terminalCommandDelay: undefined, + terminalPowershellCounter: undefined, + terminalZshClearEolMark: undefined, + terminalZshOhMy: undefined, + terminalZshP10k: undefined, + terminalZdotdir: undefined, diffEnabled: undefined, fuzzyMatchThreshold: undefined, @@ -731,6 +744,7 @@ export const clineSays = [ "new_task", "checkpoint_saved", "rooignore_error", + "diff_error", ] as const export const clineSaySchema = z.enum(clineSays) diff --git a/evals/pnpm-lock.yaml b/evals/pnpm-lock.yaml index b50e3a3492c..536ad19e3f2 100644 --- a/evals/pnpm-lock.yaml +++ b/evals/pnpm-lock.yaml @@ -62,6 +62,9 @@ importers: p-wait-for: specifier: ^5.0.2 version: 5.0.2 + ps-tree: + specifier: ^1.2.0 + version: 1.2.0 devDependencies: '@evals/eslint-config': specifier: workspace:^ @@ -69,6 +72,9 @@ importers: '@evals/typescript-config': specifier: workspace:^ version: link:../../config/typescript + '@types/ps-tree': + specifier: ^1.1.6 + version: 1.1.6 apps/web: dependencies: diff --git a/evals/scripts/setup.sh b/evals/scripts/setup.sh index ed66963542b..f58f80793e9 100755 --- a/evals/scripts/setup.sh +++ b/evals/scripts/setup.sh @@ -275,6 +275,25 @@ fi pnpm install --silent || exit 1 +if ! command -v code &>/dev/null; then + echo "⚠️ Visual Studio Code cli is not installed" + exit 1 +else + VSCODE_VERSION=$(code --version | head -n 1) + echo "✅ Visual Studio Code is installed ($VSCODE_VERSION)" +fi + +# To reset VSCode: +# rm -rvf ~/.vscode && rm -rvf ~/Library/Application\ Support/Code + +echo "🔌 Installing Visual Studio Code extensions..." +code --install-extension golang.go &>/dev/null || exit 1 +code --install-extension dbaeumer.vscode-eslint&>/dev/null || exit 1 +code --install-extension redhat.java &>/dev/null || exit 1 +code --install-extension ms-python.python&>/dev/null || exit 1 +code --install-extension rust-lang.rust-analyzer &>/dev/null || exit 1 +code --install-extension rooveterinaryinc.roo-cline &>/dev/null || exit 1 + if [[ ! -d "../../evals" ]]; then if gh auth status &>/dev/null; then read -p "🔗 Would you like to be able to share eval results? (Y/n): " fork_evals @@ -293,9 +312,9 @@ if [[ ! -s .env ]]; then cp .env.sample .env || exit 1 fi -echo "🗄️ Syncing database..." -pnpm --filter @evals/db db:push || exit 1 -pnpm --filter @evals/db db:enable-wal || exit 1 +echo "🗄️ Syncing Roo Code evals database..." +pnpm --filter @evals/db db:push &>/dev/null || exit 1 +pnpm --filter @evals/db db:enable-wal &>/dev/null || exit 1 if ! grep -q "OPENROUTER_API_KEY" .env; then read -p "🔐 Enter your OpenRouter API key (sk-or-v1-...): " openrouter_api_key @@ -304,14 +323,6 @@ if ! grep -q "OPENROUTER_API_KEY" .env; then echo "OPENROUTER_API_KEY=$openrouter_api_key" >> .env || exit 1 fi -if ! command -v code &>/dev/null; then - echo "⚠️ Visual Studio Code cli is not installed" - exit 1 -else - VSCODE_VERSION=$(code --version | head -n 1) - echo "✅ Visual Studio Code is installed ($VSCODE_VERSION)" -fi - if [[ ! -s "../bin/roo-code-latest.vsix" ]]; then build_extension else From b4ba4cb87197af0edee1597501418c4a6fd76355 Mon Sep 17 00:00:00 2001 From: cte Date: Sun, 13 Apr 2025 00:56:49 -0700 Subject: [PATCH 2/2] Remove debugging --- evals/apps/web/src/hooks/use-run-status.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/evals/apps/web/src/hooks/use-run-status.ts b/evals/apps/web/src/hooks/use-run-status.ts index 3278e69e7a2..1d463fc931d 100644 --- a/evals/apps/web/src/hooks/use-run-status.ts +++ b/evals/apps/web/src/hooks/use-run-status.ts @@ -56,7 +56,6 @@ export const useRunStatus = (run: Run) => { setTasksUpdatedAt(Date.now()) break case RooCodeEventName.TaskTokenUsageUpdated: { - console.log("taskTokenUsageUpdated", payload) const startTime = startTimes.current.get(taskId) const duration = startTime ? Date.now() - startTime : undefined tokenUsage.current.set(taskId, { ...payload[1], duration })