diff --git a/.dockerignore b/.dockerignore index 8c31673920..b83f169368 100644 --- a/.dockerignore +++ b/.dockerignore @@ -7,6 +7,7 @@ # Build artifacts bin/ +!bin/roo-code-latest.vsix dist/ **/dist/ out/ diff --git a/benchmark/Dockerfile b/benchmark/Dockerfile index 72ffc4fb40..a2b69d00c9 100644 --- a/benchmark/Dockerfile +++ b/benchmark/Dockerfile @@ -1,8 +1,3 @@ -# docker build -f Dockerfile.base -t roo-code-benchmark-base .. -# docker build -f Dockerfile -t roo-code-benchmark .. -# docker run -d -it -p 3000:3000 -v /tmp/benchmarks.db:/tmp/benchmarks.db roo-code-benchmark -# docker exec -it $(docker ps --filter "ancestor=roo-code-benchmark" -q) /bin/bash - FROM node:20-slim AS base ENV PNPM_HOME="/pnpm" ENV PATH="$PNPM_HOME:$PATH" @@ -49,10 +44,7 @@ RUN echo 'source $HOME/.cargo/env' >> $HOME/.bashrc WORKDIR /home/vscode USER vscode -COPY benchmark/entrypoint.sh /usr/local/bin/entrypoint.sh - # Copy exercises -WORKDIR /home/vscode RUN git clone https://github.com/cte/Roo-Code-Benchmark.git exercises # Prepare exercises @@ -60,23 +52,35 @@ WORKDIR /home/vscode/exercises/python RUN curl -LsSf https://astral.sh/uv/install.sh | sh RUN /home/vscode/.local/bin/uv sync -# Copy and build extension dependencies -WORKDIR /home/vscode/repo -COPY --chown=vscode:vscode package*.json /home/vscode/repo/ -COPY --chown=vscode:vscode webview-ui/package*.json /home/vscode/repo/webview-ui/ -RUN npm-run-all -l -p install-extension install-webview - -# Copy extension and benchmark code -COPY --chown=vscode:vscode . /home/vscode/repo - -# Build extension dependencies WORKDIR /home/vscode/repo/benchmark + +# Install dependencies +COPY --chown=vscode:vscode ./benchmark/package.json ./benchmark/pnpm-lock.yaml ./benchmark/pnpm-workspace.yaml ./benchmark/.npmrc ./ +RUN mkdir -p apps/cli apps/web \ + config/eslint config/typescript \ + packages/db packages/ipc packages/lib packages/types +COPY --chown=vscode:vscode ./benchmark/apps/cli/package.json ./apps/cli/ +COPY --chown=vscode:vscode ./benchmark/apps/web/package.json ./apps/web/ +COPY --chown=vscode:vscode ./benchmark/config/eslint/package.json ./config/eslint/ +COPY --chown=vscode:vscode ./benchmark/config/typescript/package.json ./config/typescript/ +COPY --chown=vscode:vscode ./benchmark/packages/db/package.json ./packages/db/ +COPY --chown=vscode:vscode ./benchmark/packages/ipc/package.json ./packages/ipc/ +COPY --chown=vscode:vscode ./benchmark/packages/lib/package.json ./packages/lib/ +COPY --chown=vscode:vscode ./benchmark/packages/types/package.json ./packages/types/ RUN pnpm install -# Initialize database -RUN echo "BENCHMARKS_DB_PATH=file:/tmp/benchmarks.db" > .env +# Copy & install extension +COPY --chown=vscode:vscode ./bin/roo-code-latest.vsix ./ +RUN code --debug --install-extension ./roo-code-latest.vsix + +# Copy application code +COPY --chown=vscode:vscode ./benchmark ./ + +# Copy environment variables +COPY --chown=vscode:vscode ./benchmark/.env ./ + +# Push database schema RUN pnpm --filter @benchmark/db db:push EXPOSE 3000 -ENTRYPOINT ["/usr/local/bin/entrypoint.sh"] -CMD ["/usr/bin/pnpm", "dev"] +CMD ["pnpm", "web"] diff --git a/benchmark/README.md b/benchmark/README.md index ae69c754dd..513efaa9c1 100644 --- a/benchmark/README.md +++ b/benchmark/README.md @@ -2,16 +2,12 @@ ## Get Started -[Install](https://docs.docker.com/desktop/) and run Docker Desktop. - -Build a container to run the Roo Code benchmarks: +Clone the Roo Code repo: ```sh -docker build -f Dockerfile -t roo-code-benchmark .. +git clone https://github.com/RooVetGit/Roo-Code.git ``` -## Local Debugging - Install nvm: ```sh @@ -28,28 +24,22 @@ corepack enable pnpm corepack use pnpm@latest-10 ``` -Install dependencies: +Build the Roo Code extension: ```sh -pnpm install +npm run install:all +npx vsce package --out bin/roo-code-latest.vsix +code --install-extension bin/roo-code-latest.vsix ``` -Configure database: - -```sh -cp packages/server/.env.sample packages/server/.env -# Update BENCHMARKS_DB_PATH as needed in `packages/server/.env`. -pnpm --filter @benchmark/db db:push -``` +[Install](https://docs.docker.com/desktop/) and run Docker Desktop. -Run the web app: +Build a container to run the Roo Code evals: ```sh -pnpm web +cd benchmark +pnpm install +pnpm docker:start ``` -Run an exercise via the cli: - -```sh -pnpm cli run [cpp|go|java|javascript|python|rust|all] [|all] -``` +Navigation to [localhost:3000](http://localhost:3000/) in your browser. diff --git a/benchmark/apps/cli/src/index.ts b/benchmark/apps/cli/src/index.ts index a2fab1ab3f..6aab914c95 100644 --- a/benchmark/apps/cli/src/index.ts +++ b/benchmark/apps/cli/src/index.ts @@ -33,6 +33,9 @@ import { IpcServer, IpcClient } from "@benchmark/ipc" import { __dirname, extensionDevelopmentPath, exercisesPath } from "./paths.js" import { getExercises } from "./exercises.js" +const maxConcurrency = 2 +const taskTimeLimit = 5 * 60 * 1_000 + const testCommands: Record = { cpp: { commands: ["cmake -G 'Unix\\ Makefiles' -DEXERCISM_RUN_ALL_TESTS=1 ..", "make"], cwd: "build" }, // timeout 15s bash -c "cd '$dir' && mkdir -p build && cd build && cmake -G 'Unix Makefiles' -DEXERCISM_RUN_ALL_TESTS=1 .. >/dev/null 2>&1 && make >/dev/null 2>&1" go: { commands: ["go test"] }, // timeout 15s bash -c "cd '$dir' && go test > /dev/null 2>&1" @@ -42,8 +45,6 @@ const testCommands: Record /dev/null 2>&1" } -let parentPid: number | undefined = undefined - const run = async (toolbox: GluegunToolbox) => { const { config, prompt } = toolbox @@ -93,6 +94,8 @@ const run = async (toolbox: GluegunToolbox) => { throw new Error("No tasks found.") } + console.log(await execa({ cwd: exercisesPath })`git config user.name "Roo Code"`) + console.log(await execa({ cwd: exercisesPath })`git config user.email "support@roocode.com"`) console.log(await execa({ cwd: exercisesPath })`git checkout -f`) console.log(await execa({ cwd: exercisesPath })`git clean -fd`) console.log(await execa({ cwd: exercisesPath })`git checkout -b runs/${run.id} main`) @@ -113,7 +116,6 @@ const run = async (toolbox: GluegunToolbox) => { // }) // }) - const maxConcurrency = 3 const runningPromises: Promise[] = [] const processTask = async (task: Task) => { @@ -147,10 +149,11 @@ const run = async (toolbox: GluegunToolbox) => { await Promise.all(runningPromises) const result = await finishRun(run.id) - console.log("[cli#run]", result) - - if (parentPid) { - console.log(await execa`kill -INT ${parentPid}`) + try { + console.log("[cli#run]", result) + // eslint-disable-next-line @typescript-eslint/no-unused-vars + } catch (error) { + // console.error(error) } console.log(await execa({ cwd: exercisesPath })`git add .`) @@ -163,18 +166,38 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server const dirname = path.dirname(run.socketPath) const taskSocketPath = path.resolve(dirname, `${dirname}/task-${task.id}.sock`) - await execa({ - env: { ROO_CODE_IPC_SOCKET_PATH: taskSocketPath }, - })`code -n ${path.resolve(exercisesPath, language, exercise)}` + const controller = new AbortController() + const cancelSignal = controller.signal + + // If debugging: + // Use --wait --log trace or --verbose. + let codeCommand = `code --disable-workspace-trust` + const isDocker = fs.existsSync("/.dockerenv") - console.log(`Connecting to ${taskSocketPath}`) + if (isDocker) { + codeCommand = `xvfb-run --auto-servernum --server-num=1 ${codeCommand} --wait --log trace --disable-gpu --password-store="basic"` + } + + const subprocess = execa({ + env: { + ROO_CODE_IPC_SOCKET_PATH: taskSocketPath, + }, + shell: "/bin/bash", + cancelSignal, + })`${codeCommand} -n ${path.resolve(exercisesPath, language, exercise)}` + + // If debugging: + // subprocess.stdout.pipe(process.stdout) + + // Give VSCode some time to spawn before connectint to its unix socket. + await new Promise((resolve) => setTimeout(resolve, isDocker ? 5_000 : 1_000)) + console.log(`Connecting to ${taskSocketPath} (pid: ${subprocess.pid})`) const createClient = (taskSocketPath: string) => { const ipcClient = new IpcClient(taskSocketPath) ipcClient.on(IpcMessageType.Ack, (ack) => { console.log(`[cli#runExercise | ${language} / ${exercise}] ack`, ack) - parentPid = ack.ppid }) return ipcClient @@ -185,7 +208,7 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server while (++tries < 5) { try { - await pWaitFor(() => client.isReady, { interval: 100, timeout: 2_000 }) + await pWaitFor(() => client.isReady, { interval: 100, timeout: 5_000 }) break } catch (error) { console.error(error) @@ -194,24 +217,19 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server } } - if (!client.isReady) { - client.disconnect() - console.log(`[cli#runExercise | ${language} / ${exercise}] unable to connect`) - return - } - let isTaskFinished = false + let isClientDisconnected = false client.on(IpcMessageType.Disconnect, async () => { console.log(`[cli#runExercise | ${language} / ${exercise}] disconnect`) - // await updateTask(task.id, { finishedAt: new Date() }) isTaskFinished = true + isClientDisconnected = true }) - const ignoreEvents = [ + const ignoreEvents: RooCodeEventName[] = [ RooCodeEventName.Message, - RooCodeEventName.TaskTokenUsageUpdated, - RooCodeEventName.TaskAskResponded, + // RooCodeEventName.TaskTokenUsageUpdated, + // RooCodeEventName.TaskAskResponded, ] let taskStartedAt = Date.now() @@ -230,6 +248,7 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server if (!ignoreEvents.includes(eventName)) { console.log(`[cli#runExercise | ${language} / ${exercise}] taskEvent -> ${eventName}`) + // console.log(payload) } if (eventName === RooCodeEventName.TaskStarted) { @@ -278,33 +297,40 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server } }) - client.sendMessage({ - type: IpcMessageType.TaskCommand, - origin: IpcOrigin.Client, - clientId: client.clientId!, - data: { - commandName: TaskCommandName.StartNewTask, + if (client.isReady) { + client.sendMessage({ + type: IpcMessageType.TaskCommand, + origin: IpcOrigin.Client, + clientId: client.clientId!, data: { - configuration: { - ...rooCodeDefaults, - openRouterApiKey: process.env.OPENROUTER_API_KEY!, - ...run.settings, + commandName: TaskCommandName.StartNewTask, + data: { + configuration: { + ...rooCodeDefaults, + openRouterApiKey: process.env.OPENROUTER_API_KEY!, + ...run.settings, + }, + text: prompt, + newTab: true, }, - text: prompt, - newTab: true, }, - }, - }) + }) - console.log(`[cli#runExercise | ${language} / ${exercise}] starting task`) + console.log(`[cli#runExercise | ${language} / ${exercise}] starting task`) + } else { + console.log(`[cli#runExercise | ${language} / ${exercise}] unable to connect`) + client.disconnect() + isTaskFinished = true + isClientDisconnected = true + } try { - await pWaitFor(() => isTaskFinished, { interval: 1_000, timeout: 1 * 60 * 1_000 }) + await pWaitFor(() => isTaskFinished, { interval: 1_000, timeout: taskTimeLimit }) // eslint-disable-next-line @typescript-eslint/no-unused-vars } catch (error) { console.log(`[cli#runExercise | ${language} / ${exercise}] time limit reached`) - if (rooTaskId) { + if (rooTaskId && !isClientDisconnected) { client.sendMessage({ type: IpcMessageType.TaskCommand, origin: IpcOrigin.Client, @@ -318,24 +344,35 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server await updateTask(task.id, { finishedAt: new Date() }) } - try { - client.sendMessage({ - type: IpcMessageType.VSCodeCommand, - origin: IpcOrigin.Client, - clientId: client.clientId!, - data: "workbench.action.files.saveFiles", - }) + if (!isClientDisconnected) { + try { + client.sendMessage({ + type: IpcMessageType.VSCodeCommand, + origin: IpcOrigin.Client, + clientId: client.clientId!, + data: "workbench.action.files.saveFiles", + }) - client.sendMessage({ - type: IpcMessageType.VSCodeCommand, - origin: IpcOrigin.Client, - clientId: client.clientId!, - data: "workbench.action.closeWindow", - }) + client.sendMessage({ + type: IpcMessageType.VSCodeCommand, + origin: IpcOrigin.Client, + clientId: client.clientId!, + data: "workbench.action.closeWindow", + }) - client.disconnect() + client.disconnect() + } catch (error) { + console.error(error) + } + } + + try { + console.log(`[cli#runExercise | ${language} / ${exercise}] aborting subprocess`) + controller.abort() + await subprocess + // eslint-disable-next-line @typescript-eslint/no-unused-vars } catch (error) { - console.error(error) + // console.error(error) } } diff --git a/benchmark/apps/web/src/lib/server/runs.ts b/benchmark/apps/web/src/lib/server/runs.ts index 68ff4f49ff..19d3d41f8f 100644 --- a/benchmark/apps/web/src/lib/server/runs.ts +++ b/benchmark/apps/web/src/lib/server/runs.ts @@ -3,6 +3,7 @@ import { spawn } from "child_process" import path from "path" import os from "os" +import fs from "fs" import { revalidatePath } from "next/cache" import pMap from "p-map" @@ -42,12 +43,14 @@ export async function createRun({ suite, exercises = [], ...values }: CreateRun) revalidatePath("/runs") try { + const logFile = fs.openSync(`/tmp/roo-code-evals-${run.id}.log`, "a") + const process = spawn( "pnpm", ["--filter", "@benchmark/cli", "dev", "run", "all", "--runId", run.id.toString()], { detached: true, - stdio: "ignore", + stdio: ["ignore", logFile, logFile], }, ) diff --git a/benchmark/entrypoint.sh b/benchmark/entrypoint.sh deleted file mode 100755 index ab24ab6bff..0000000000 --- a/benchmark/entrypoint.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash - -npx drizzle-kit push -exec "$@" diff --git a/benchmark/package.json b/benchmark/package.json index 52aa20a16a..b7d145e174 100644 --- a/benchmark/package.json +++ b/benchmark/package.json @@ -11,14 +11,13 @@ "web": "turbo dev --filter @benchmark/web --output-logs new-only --ui tui", "cli": "turbo dev --filter @benchmark/cli --output-logs new-only --ui tui -- run", "drizzle:studio": "pnpm --filter @benchmark/db db:studio", - "docker:build": "docker build -f Dockerfile -t roo-code-benchmark ..", + "docker:build": "docker build -f Dockerfile -t roo-code-benchmark --progress=plain ..", "docker:run": "touch /tmp/benchmarks.db && docker run -d -it -p 3000:3000 -v /tmp/benchmarks.db:/tmp/benchmarks.db roo-code-benchmark", - "docker:start": "npm run docker:build && npm run docker:run", + "docker:start": "pnpm docker:build && pnpm docker:run", "docker:shell": "docker exec -it $(docker ps --filter \"ancestor=roo-code-benchmark\" -q) /bin/bash", - "docker:cli": "docker exec -it -w /home/vscode/repo/benchmark $(docker ps --filter \"ancestor=roo-code-benchmark\" -q) xvfb-run npm run cli --", "docker:stop": "docker stop $(docker ps --filter \"ancestor=roo-code-benchmark\" -q)", "docker:rm": "docker rm $(docker ps -a --filter \"ancestor=roo-code-benchmark\" -q)", - "docker:clean": "npm run docker:stop && npm run docker:rm" + "docker:clean": "pnpm docker:stop && pnpm docker:rm" }, "devDependencies": { "@dotenvx/dotenvx": "^1.39.0", diff --git a/benchmark/packages/types/src/roo-code-defaults.ts b/benchmark/packages/types/src/roo-code-defaults.ts index c22233869f..855b0ca231 100644 --- a/benchmark/packages/types/src/roo-code-defaults.ts +++ b/benchmark/packages/types/src/roo-code-defaults.ts @@ -41,7 +41,7 @@ export const rooCodeDefaults: RooCodeSettings = { maxReadFileLine: 500, terminalOutputLineLimit: 500, - terminalShellIntegrationTimeout: 5000, + terminalShellIntegrationTimeout: 15000, rateLimitSeconds: 0, diffEnabled: true, diff --git a/src/exports/api.ts b/src/exports/api.ts index be24483df3..61dd34cc63 100644 --- a/src/exports/api.ts +++ b/src/exports/api.ts @@ -4,7 +4,7 @@ import * as vscode from "vscode" import { ClineProvider } from "../core/webview/ClineProvider" import { openClineInNewTab } from "../activate/registerCommands" -import { RooCodeSettings, RooCodeEvents, RooCodeEventName } from "../schemas" +import { RooCodeSettings, RooCodeEvents, RooCodeEventName, ClineMessage } from "../schemas" import { IpcOrigin, IpcMessageType, TaskCommandName, TaskEvent } from "../schemas/ipc" import { RooCodeAPI } from "./interface" import { IpcServer } from "./ipc" @@ -28,28 +28,75 @@ export class API extends EventEmitter implements RooCodeAPI { this.registerListeners(this.sidebarProvider) if (socketPath) { - this.ipc = new IpcServer(socketPath, (...args: unknown[]) => outputChannelLog(this.outputChannel, ...args)) - this.ipc.listen() + const ipc = (this.ipc = new IpcServer(socketPath, (...args: unknown[]) => + outputChannelLog(this.outputChannel, ...args), + )) - this.outputChannel.appendLine( - `[API] ipc server started: socketPath=${socketPath}, pid=${process.pid}, ppid=${process.ppid}`, - ) - - this.ipc.on(IpcMessageType.TaskCommand, async (_clientId, { commandName, data }) => { - this.outputChannel.appendLine(`[API] TaskCommand -> ${commandName}`) + ipc.listen() + this.log(`[API] ipc server started: socketPath=${socketPath}, pid=${process.pid}, ppid=${process.ppid}`) + ipc.on(IpcMessageType.TaskCommand, async (_clientId, { commandName, data }) => { switch (commandName) { case TaskCommandName.StartNewTask: - await this.startNewTask(data) + this.log(`[API] StartNewTask -> ${data.text}`) + this.log(`[API] StartNewTask -> ${JSON.stringify(data.configuration)}`) + + try { + await this.startNewTask(data) + + ipc.broadcast({ + type: IpcMessageType.TaskEvent, + origin: IpcOrigin.Server, + data: { + eventName: RooCodeEventName.Message, + payload: [ + { + taskId: "[system]", + action: "created", + message: { + ts: Date.now(), + type: "say", + text: `ACK: TaskCommand -> ${commandName}`, + }, + }, + ], + }, + }) + } catch (error) { + this.log(`[API] error starting new task: ${error}`) + } + break case TaskCommandName.CancelTask: + this.log(`[API] CancelTask -> ${data}`) + await this.cancelTask(data) + + ipc.broadcast({ + type: IpcMessageType.TaskEvent, + origin: IpcOrigin.Server, + data: { + eventName: RooCodeEventName.Message, + payload: [ + { + taskId: "[system]", + action: "created", + message: { + ts: Date.now(), + type: "say", + text: `ACK: CancelTask -> ${data}`, + }, + }, + ], + }, + }) + break } }) - this.ipc.on(IpcMessageType.VSCodeCommand, async (_clientId, command) => { - this.outputChannel.appendLine(`[API] VSCodeCommand -> ${command}`) + ipc.on(IpcMessageType.VSCodeCommand, async (_clientId, command) => { + this.log(`[API] VSCodeCommand -> ${command}`) await vscode.commands.executeCommand(command) }) } @@ -164,6 +211,7 @@ export class API extends EventEmitter implements RooCodeAPI { public log(message: string) { this.outputChannel.appendLine(message) + console.log(`${message}\n`) } private registerListeners(provider: ClineProvider) { diff --git a/src/integrations/terminal/Terminal.ts b/src/integrations/terminal/Terminal.ts index a48a898682..6462460620 100644 --- a/src/integrations/terminal/Terminal.ts +++ b/src/integrations/terminal/Terminal.ts @@ -3,7 +3,7 @@ import pWaitFor from "p-wait-for" import { ExitCodeDetails, mergePromise, TerminalProcess, TerminalProcessResultPromise } from "./TerminalProcess" import { truncateOutput, applyRunLengthEncoding } from "../misc/extract-text" -export const TERMINAL_SHELL_INTEGRATION_TIMEOUT = 5000 +export const TERMINAL_SHELL_INTEGRATION_TIMEOUT = 15_000 export class Terminal { private static shellIntegrationTimeout: number = TERMINAL_SHELL_INTEGRATION_TIMEOUT @@ -186,7 +186,7 @@ export class Terminal { console.log(`[Terminal ${this.id}] Shell integration not available. Command execution aborted.`) process.emit( "no_shell_integration", - "Shell integration initialization sequence '\\x1b]633;A' was not received within 4 seconds. Shell integration has been disabled for this terminal instance. Increase the timeout in the settings if necessary.", + `Shell integration initialization sequence '\\x1b]633;A' was not received within ${Terminal.shellIntegrationTimeout / 1000}s. Shell integration has been disabled for this terminal instance. Increase the timeout in the settings if necessary.`, ) }) })