From cd581bc26746f92001b02d17e5cd5aaf9ad9baac Mon Sep 17 00:00:00 2001 From: cte Date: Thu, 5 Jun 2025 16:30:11 -0700 Subject: [PATCH] Fix rust evals (run cargo with bash) + logging tweaks --- packages/evals/src/cli/index.ts | 3 +- packages/evals/src/cli/processTask.ts | 2 +- packages/evals/src/cli/runEvals.ts | 26 ++--- packages/evals/src/cli/runTask.ts | 154 ++++++++++---------------- packages/evals/src/cli/runUnitTest.ts | 32 +++--- packages/evals/src/cli/utils.ts | 15 +++ packages/ipc/src/ipc-client.ts | 20 +++- 7 files changed, 124 insertions(+), 128 deletions(-) diff --git a/packages/evals/src/cli/index.ts b/packages/evals/src/cli/index.ts index b50c72dd14..7876687f3a 100644 --- a/packages/evals/src/cli/index.ts +++ b/packages/evals/src/cli/index.ts @@ -8,7 +8,7 @@ import { runEvals } from "./runEvals.js" import { processTask } from "./processTask.js" const main = async () => { - const result = await run( + await run( command({ name: "cli", description: "Execute an eval run.", @@ -43,7 +43,6 @@ const main = async () => { process.argv.slice(2), ) - console.log(result) process.exit(0) } diff --git a/packages/evals/src/cli/processTask.ts b/packages/evals/src/cli/processTask.ts index 7ccf512b59..50cb7c060e 100644 --- a/packages/evals/src/cli/processTask.ts +++ b/packages/evals/src/cli/processTask.ts @@ -25,7 +25,7 @@ export const processTask = async (taskId: number) => { await runTask({ run, task, publish }) console.log(`[${Date.now()} | ${tag}] testing task ${task.id} (${task.language}/${task.exercise})...`) - const passed = await runUnitTest({ task }) + const passed = await runUnitTest({ run, task }) console.log(`[${Date.now()} | ${tag}] task ${task.id} (${task.language}/${task.exercise}) -> ${passed}`) await updateTask(task.id, { passed }) diff --git a/packages/evals/src/cli/runEvals.ts b/packages/evals/src/cli/runEvals.ts index aaeaf036da..9c4d9df292 100644 --- a/packages/evals/src/cli/runEvals.ts +++ b/packages/evals/src/cli/runEvals.ts @@ -1,10 +1,9 @@ -import { execa } from "execa" import PQueue from "p-queue" import { findRun, finishRun, getTasks } from "../db/index.js" import { exercisesPath } from "../exercises/index.js" -import { getTag, isDockerContainer } from "./utils.js" +import { getTag, isDockerContainer, resetEvalsRepo, commitEvalsRepoChanges } from "./utils.js" import { processTask, processTaskInContainer } from "./processTask.js" import { startHeartbeat, stopHeartbeat } from "./redis.js" @@ -24,31 +23,32 @@ export const runEvals = async (runId: number) => { const tag = getTag("runEvals", { run }) console.log(`[${Date.now()} | ${tag}] running ${tasks.length} task(s)`) - const cwd = exercisesPath - await execa({ cwd })`git config user.name "Roo Code"` - await execa({ cwd })`git config user.email "support@roocode.com"` - await execa({ cwd })`git checkout -f` - await execa({ cwd })`git clean -fd` - await execa({ cwd })`git checkout -b runs/${run.id}-${crypto.randomUUID().slice(0, 8)} main` + const containerized = isDockerContainer() + + if (!containerized) { + await resetEvalsRepo({ run, cwd: exercisesPath }) + } const heartbeat = await startHeartbeat(run.id) const queue = new PQueue({ concurrency: run.concurrency }) try { - const containerize = isDockerContainer() - await queue.addAll( tasks .filter((task) => task.finishedAt === null) - .map((task) => () => (containerize ? processTaskInContainer(task.id) : processTask(task.id))), + .map((task) => () => (containerized ? processTaskInContainer(task.id) : processTask(task.id))), ) console.log(`[${Date.now()} | ${tag}] finishRun`) const result = await finishRun(run.id) console.log(`[${Date.now()} | ${tag}] result ->`, result) - await execa({ cwd: exercisesPath })`git add .` - await execa({ cwd: exercisesPath })`git commit -m ${`Run #${run.id}`} --no-verify` + // There's no need to commit the changes in the container since they + // will lost when the container is destroyed. I think we should + // store the diffs in the database instead. + if (!containerized) { + await commitEvalsRepoChanges({ run, cwd: exercisesPath }) + } } finally { console.log(`[${Date.now()} | ${tag}] cleaning up`) stopHeartbeat(run.id, heartbeat) diff --git a/packages/evals/src/cli/runTask.ts b/packages/evals/src/cli/runTask.ts index fd5729704b..d27f4bac09 100644 --- a/packages/evals/src/cli/runTask.ts +++ b/packages/evals/src/cli/runTask.ts @@ -6,11 +6,10 @@ import pWaitFor from "p-wait-for" import { execa } from "execa" import { + type TaskEvent, + TaskCommandName, RooCodeEventName, - IpcOrigin, IpcMessageType, - TaskCommandName, - type TaskEvent, EVALS_SETTINGS, EVALS_TIMEOUT, } from "@roo-code/types" @@ -27,49 +26,38 @@ type RunTaskOptions = { publish: (taskEvent: TaskEvent) => Promise } -export const runTask = async ({ run, task, publish }: RunTaskOptions): Promise<{ success: boolean }> => { - const { language, exercise } = task +export const runTask = async ({ run, task, publish }: RunTaskOptions) => { const tag = getTag("runTask", { run, task }) + const log = (message: string, ...args: unknown[]) => console.log(`[${Date.now()} | ${tag}] ${message}`, ...args) + const logError = (message: string, ...args: unknown[]) => + console.error(`[${Date.now()} | ${tag}] ${message}`, ...args) + const { language, exercise } = task const prompt = fs.readFileSync(path.resolve(exercisesPath, `prompts/${language}.md`), "utf-8") const workspacePath = path.resolve(exercisesPath, language, exercise) - const taskSocketPath = path.resolve(os.tmpdir(), `evals-${run.id}-${task.id}.sock`) - - // Inject foot gun system prompt if present. - if (process.env.FOOTGUN_SYSTEM_PROMPT) { - const rooDir = path.join(workspacePath, ".roo") - - if (!fs.existsSync(rooDir)) { - fs.mkdirSync(rooDir, { recursive: true }) - } - - fs.writeFileSync(path.join(rooDir, "system-prompt-code"), process.env.FOOTGUN_SYSTEM_PROMPT) - } - - console.log(`[${Date.now()} | ${tag}] Opening new VS Code window at ${workspacePath}`) - + const ipcSocketPath = path.resolve(os.tmpdir(), `evals-${run.id}-${task.id}.sock`) + const env = { ROO_CODE_IPC_SOCKET_PATH: ipcSocketPath } const controller = new AbortController() const cancelSignal = controller.signal + const containerized = isDockerContainer() - const codeCommand = isDockerContainer() - ? `xvfb-run --auto-servernum --server-num=1 code --wait --log trace --disable-workspace-trust --disable-gpu --disable-lcd-text --no-sandbox --user-data-dir /roo/.vscode --password-store="basic"` - : `code --disable-workspace-trust` + const codeCommand = containerized + ? `xvfb-run --auto-servernum --server-num=1 code --wait --log trace --disable-workspace-trust --disable-gpu --disable-lcd-text --no-sandbox --user-data-dir /roo/.vscode --password-store="basic" -n ${workspacePath}` + : `code --disable-workspace-trust -n ${workspacePath}` - console.log(`[${Date.now()} | ${tag}] ${codeCommand}`) + log(codeCommand) - // Sleep for a random amount of time between 5 and 10 seconds. - await new Promise((resolve) => setTimeout(resolve, Math.random() * 5_000 + 5_000)) + // Sleep for a random amount of time between 5 and 10 seconds, unless we're + // running in a container, in which case there are no issues with flooding + // VSCode with new windows. + if (!containerized) { + await new Promise((resolve) => setTimeout(resolve, Math.random() * 5_000 + 5_000)) + } - const subprocess = execa({ - env: { - ROO_CODE_IPC_SOCKET_PATH: taskSocketPath, - }, - shell: "/bin/bash", - cancelSignal, - })`${codeCommand} -n ${workspacePath}` + const subprocess = execa({ env, shell: "/bin/bash", cancelSignal })`${codeCommand}` - // If debugging: - subprocess.stdout.pipe(process.stdout) + // If debugging, add `--verbose` to `command` and uncomment the following line. + // subprocess.stdout.pipe(process.stdout) // Give VSCode some time to spawn before connecting to its unix socket. await new Promise((resolve) => setTimeout(resolve, 3_000)) @@ -78,26 +66,20 @@ export const runTask = async ({ run, task, publish }: RunTaskOptions): Promise<{ while (true) { try { - console.log(`[${Date.now()} | ${tag}] connecting to ${taskSocketPath}`) - client = new IpcClient(taskSocketPath) + client = new IpcClient(ipcSocketPath) await pWaitFor(() => client!.isReady, { interval: 250, timeout: 1_000 }) break } catch (_error) { - if (client) { - client.disconnect() - } - + client?.disconnect() attempts-- if (attempts <= 0) { - console.error(`[${Date.now()} | ${tag}] unable to connect`) - return { success: false } + logError(`unable to connect to IPC socket -> ${ipcSocketPath}`) + return } } } - console.log(`[${Date.now()} | ${tag}] connected to ${taskSocketPath}`) - let taskStartedAt = Date.now() let taskFinishedAt: number | undefined let taskMetricsId: number | undefined @@ -106,18 +88,24 @@ export const runTask = async ({ run, task, publish }: RunTaskOptions): Promise<{ const ignoreEvents: Record<"broadcast" | "log", RooCodeEventName[]> = { broadcast: [RooCodeEventName.Message], - log: [RooCodeEventName.TaskTokenUsageUpdated], // [RooCodeEventName.Message, RooCodeEventName.TaskAskResponded], + log: [RooCodeEventName.TaskTokenUsageUpdated, RooCodeEventName.TaskAskResponded], } client.on(IpcMessageType.TaskEvent, async (taskEvent) => { const { eventName, payload } = taskEvent + // Publish all events except for these to Redis. if (!ignoreEvents.broadcast.includes(eventName)) { await publish({ ...taskEvent, taskId: task.id }) } - if (!ignoreEvents.log.includes(eventName)) { - console.log(`[${Date.now()} | ${tag}] ${eventName} ->`, payload) + // Log all events except for these. + // For message events we only log non-partial messages. + if ( + !ignoreEvents.log.includes(eventName) && + (eventName !== RooCodeEventName.Message || payload[0].message.partial !== true) + ) { + log(`${eventName} ->`, payload) } if (eventName === RooCodeEventName.TaskStarted) { @@ -177,54 +165,32 @@ export const runTask = async ({ run, task, publish }: RunTaskOptions): Promise<{ }) client.on(IpcMessageType.Disconnect, async () => { - console.log(`[${Date.now()} | ${tag}] disconnect`) + log(`disconnected from IPC socket -> ${ipcSocketPath}`) isClientDisconnected = true }) - if (client.isReady) { - const configuration = { - ...EVALS_SETTINGS, - ...run.settings, - openRouterApiKey: process.env.OPENROUTER_API_KEY, - } - - client.sendMessage({ - type: IpcMessageType.TaskCommand, - origin: IpcOrigin.Client, - clientId: client.clientId!, - data: { - commandName: TaskCommandName.StartNewTask, - data: { - configuration, - text: prompt, - newTab: true, - }, + client.sendCommand({ + commandName: TaskCommandName.StartNewTask, + data: { + configuration: { + ...EVALS_SETTINGS, + ...run.settings, + openRouterApiKey: process.env.OPENROUTER_API_KEY, }, - }) - } else { - console.error(`[${Date.now()} | ${tag}] unable to connect`) - client.disconnect() - taskFinishedAt = Date.now() - isClientDisconnected = true - } + text: prompt, + newTab: true, + }, + }) try { await pWaitFor(() => !!taskFinishedAt || isClientDisconnected, { interval: 1_000, timeout: EVALS_TIMEOUT }) - // eslint-disable-next-line @typescript-eslint/no-unused-vars - } catch (error) { - console.log(`[${Date.now()} | ${tag}] time limit reached`) + } catch (_error) { + logError("time limit reached") - // Cancel the task. if (rooTaskId && !isClientDisconnected) { - client.sendMessage({ - type: IpcMessageType.TaskCommand, - origin: IpcOrigin.Client, - clientId: client.clientId!, - data: { commandName: TaskCommandName.CancelTask, data: rooTaskId }, - }) - - // Allow some time for the task to cancel. - await new Promise((resolve) => setTimeout(resolve, 5_000)) + log("cancelling task") + client.sendCommand({ commandName: TaskCommandName.CancelTask, data: rooTaskId }) + await new Promise((resolve) => setTimeout(resolve, 5_000)) // Allow some time for the task to cancel. } await updateTask(task.id, { finishedAt: new Date() }) @@ -232,15 +198,9 @@ export const runTask = async ({ run, task, publish }: RunTaskOptions): Promise<{ if (!isClientDisconnected) { if (rooTaskId) { - client.sendMessage({ - type: IpcMessageType.TaskCommand, - origin: IpcOrigin.Client, - clientId: client.clientId!, - data: { commandName: TaskCommandName.CloseTask, data: rooTaskId }, - }) - - // Allow some time for the window to close. - await new Promise((resolve) => setTimeout(resolve, 2_000)) + log("closing task") + client.sendCommand({ commandName: TaskCommandName.CloseTask, data: rooTaskId }) + await new Promise((resolve) => setTimeout(resolve, 2_000)) // Allow some time for the window to close. } client.disconnect() @@ -248,6 +208,4 @@ export const runTask = async ({ run, task, publish }: RunTaskOptions): Promise<{ controller.abort() await subprocess - - return { success: !!taskFinishedAt } } diff --git a/packages/evals/src/cli/runUnitTest.ts b/packages/evals/src/cli/runUnitTest.ts index c109895939..7785312e76 100644 --- a/packages/evals/src/cli/runUnitTest.ts +++ b/packages/evals/src/cli/runUnitTest.ts @@ -3,9 +3,11 @@ import * as path from "path" import { execa, parseCommandString } from "execa" import psTree from "ps-tree" -import { type Task } from "../db/index.js" +import type { Run, Task } from "../db/index.js" import { type ExerciseLanguage, exercisesPath } from "../exercises/index.js" +import { getTag } from "./utils.js" + const UNIT_TEST_TIMEOUT = 2 * 60 * 1_000 const testCommands: Record = { @@ -16,18 +18,25 @@ const testCommands: Record { +export const runUnitTest = async ({ run, task }: { run: Run; task: Task }) => { + const tag = getTag("runUnitTest", { run, task }) + const log = (message: string, ...args: unknown[]) => console.log(`[${Date.now()} | ${tag}] ${message}`, ...args) + const logError = (message: string, ...args: unknown[]) => + console.error(`[${Date.now()} | ${tag}] ${message}`, ...args) + const cmd = testCommands[task.language] const exercisePath = path.resolve(exercisesPath, task.language, task.exercise) const cwd = cmd.cwd ? path.resolve(exercisePath, cmd.cwd) : exercisePath const commands = cmd.commands.map((cs) => parseCommandString(cs)) - const tag = `cli#runUnitTest | ${task.language} / ${task.exercise}` let passed = true for (const command of commands) { try { - const subprocess = execa({ cwd, shell: true, reject: false })`${command}` + log(`running "${command.join(" ")}"`) + const subprocess = execa({ cwd, shell: "/bin/bash", reject: false })`${command}` + subprocess.stdout.pipe(process.stdout) + subprocess.stderr.pipe(process.stderr) const timeout = setTimeout(async () => { const descendants = await new Promise((resolve, reject) => { @@ -40,28 +49,25 @@ export const runUnitTest = async ({ task }: { task: Task }) => { }) }) - console.log( - `${Date.now()} [${tag}] "${command.join(" ")}": unit tests timed out, killing ${subprocess.pid} + ${JSON.stringify(descendants)}`, - ) + log(`"${command.join(" ")}" timed out, killing ${subprocess.pid} + ${JSON.stringify(descendants)}`) if (descendants.length > 0) { for (const descendant of descendants) { try { - console.log(`${Date.now()} [${tag}] killing ${descendant}`) - + log(`killing descendant process ${descendant}`) await execa`kill -9 ${descendant}` } catch (error) { - console.error(`${Date.now()} [${tag}] Error killing descendant processes:`, error) + logError(`failed to kill descendant process ${descendant}:`, error) } } } - console.log(`${Date.now()} [${tag}] killing ${subprocess.pid}`) + log(`killing main process ${subprocess.pid}`) try { await execa`kill -9 ${subprocess.pid!}` } catch (error) { - console.error(`${Date.now()} [${tag}] Error killing process:`, error) + logError(`failed to kill main process ${subprocess.pid}:`, error) } }, UNIT_TEST_TIMEOUT) @@ -74,7 +80,7 @@ export const runUnitTest = async ({ task }: { task: Task }) => { break } } catch (error) { - console.error(`${Date.now()} [${tag}]`, error) + logError(`unexpected error:`, error) passed = false break } diff --git a/packages/evals/src/cli/utils.ts b/packages/evals/src/cli/utils.ts index ed560896a1..cbabb451b9 100644 --- a/packages/evals/src/cli/utils.ts +++ b/packages/evals/src/cli/utils.ts @@ -1,5 +1,7 @@ import * as fs from "fs" +import { execa } from "execa" + import type { Run, Task } from "../db/index.js" export const getTag = (caller: string, { run, task }: { run: Run; task?: Task }) => @@ -14,3 +16,16 @@ export const isDockerContainer = () => { return false } } + +export const resetEvalsRepo = async ({ run, cwd }: { run: Run; cwd: string }) => { + await execa({ cwd })`git config user.name "Roo Code"` + await execa({ cwd })`git config user.email "support@roocode.com"` + await execa({ cwd })`git checkout -f` + await execa({ cwd })`git clean -fd` + await execa({ cwd })`git checkout -b runs/${run.id}-${crypto.randomUUID().slice(0, 8)} main` +} + +export const commitEvalsRepoChanges = async ({ run, cwd }: { run: Run; cwd: string }) => { + await execa({ cwd })`git add .` + await execa({ cwd })`git commit -m ${`Run #${run.id}`} --no-verify` +} diff --git a/packages/ipc/src/ipc-client.ts b/packages/ipc/src/ipc-client.ts index 9ccf0c02aa..0d3a767e00 100644 --- a/packages/ipc/src/ipc-client.ts +++ b/packages/ipc/src/ipc-client.ts @@ -3,7 +3,14 @@ import * as crypto from "node:crypto" import ipc from "node-ipc" -import { type IpcClientEvents, type IpcMessage, IpcOrigin, IpcMessageType, ipcMessageSchema } from "@roo-code/types" +import { + type TaskCommand, + type IpcClientEvents, + type IpcMessage, + IpcOrigin, + IpcMessageType, + ipcMessageSchema, +} from "@roo-code/types" export class IpcClient extends EventEmitter { private readonly _socketPath: string @@ -80,6 +87,17 @@ export class IpcClient extends EventEmitter { this._log(...args) } + public sendCommand(command: TaskCommand) { + const message: IpcMessage = { + type: IpcMessageType.TaskCommand, + origin: IpcOrigin.Client, + clientId: this._clientId!, + data: command, + } + + this.sendMessage(message) + } + public sendMessage(message: IpcMessage) { ipc.of[this._id]?.emit("message", message) }