From 7757863c50868c560363b8175b63619f4510e5d4 Mon Sep 17 00:00:00 2001 From: Canyon Robins Date: Thu, 29 May 2025 13:47:49 -0700 Subject: [PATCH 1/7] [Evals] Try to resurrect headless evals with Docker --- .dockerignore | 29 ++++++++++++++ evals/Dockerfile | 79 +++++++++++++++++++++++++++++++++++++ evals/apps/cli/src/index.ts | 64 ++++++++++++++++++++++-------- evals/package.json | 9 ++++- 4 files changed, 163 insertions(+), 18 deletions(-) create mode 100644 .dockerignore create mode 100644 evals/Dockerfile diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000000..3bccd1a0ae --- /dev/null +++ b/.dockerignore @@ -0,0 +1,29 @@ +# Version control +# .git/ +# .gitignore +# .gitattributes +# .git-blame-ignore-revs +# .gitconfig + + # Build artifacts + bin/ + !bin/roo-code-latest.vsix + dist/ + **/dist/ + out/ +**/out/ + +# Dependencies +node_modules/ +**/node_modules/ +# Test and development files +coverage/ +**/.vscode-test/ +# Configuration files +# .env* +knip.json +.husky/ +# CI/CD +# .changeset/ +# .github/ +# ellipsis.yaml diff --git a/evals/Dockerfile b/evals/Dockerfile new file mode 100644 index 0000000000..8ad01424db --- /dev/null +++ b/evals/Dockerfile @@ -0,0 +1,79 @@ +FROM node:20-slim AS base + ENV PNPM_HOME="/pnpm" + ENV PATH="$PNPM_HOME:$PATH" +RUN corepack enable +RUN npm install -g npm@latest +RUN npm install -g npm-run-all +# Install dependencies +RUN apt update && apt install -y sudo curl git vim jq + + +# Create a `vscode` user +RUN useradd -m vscode -s /bin/bash && \ + echo "vscode ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/vscode && \ + chmod 0440 /etc/sudoers.d/vscode +# Install VS Code +# https://code.visualstudio.com/docs/setup/linux +RUN apt install -y wget gpg apt-transport-https +RUN wget -qO- https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor > packages.microsoft.gpg +RUN install -D -o root -g root -m 644 packages.microsoft.gpg /etc/apt/keyrings/packages.microsoft.gpg +RUN echo "deb [arch=amd64,arm64,armhf signed-by=/etc/apt/keyrings/packages.microsoft.gpg] https://packages.microsoft.com/repos/code stable main" | tee /etc/apt/sources.list.d/vscode.list > /dev/null +RUN rm -f packages.microsoft.gpg +RUN apt update && apt install -y code +# Install Xvfb +RUN apt install -y xvfb +# [cpp] Install cmake 3.28.3 +RUN apt install -y cmake +# [go] Install Go 1.22.2 +RUN apt install -y golang-go +# [java] Install Java 21 +RUN apt install -y default-jre +# [python] Install Python 3.12.3 and uv 0.6.6 +RUN apt install -y python3 python3-venv python3-dev python3-pip +# [rust] Install Rust 1.85 +RUN curl https://sh.rustup.rs -sSf | bash -s -- -y +RUN echo 'source $HOME/.cargo/env' >> $HOME/.bashrc + WORKDIR /home/vscode + USER vscode + + # Copy exercises + RUN git clone https://github.com/RooCodeInc/Roo-Code-Evals.git exercises + + # Prepare exercises + WORKDIR /home/vscode/exercises/python + RUN curl -LsSf https://astral.sh/uv/install.sh | sh + RUN /home/vscode/.local/bin/uv sync + + WORKDIR /home/vscode/repo/benchmark + + # Install dependencies + COPY --chown=vscode:vscode ./evals/package.json ./evals/pnpm-lock.yaml ./evals/pnpm-workspace.yaml ./evals/.npmrc ./ + RUN mkdir -p apps/cli apps/web \ + config/eslint config/typescript \ + packages/db packages/ipc packages/lib packages/types + COPY --chown=vscode:vscode ./evals/apps/cli/package.json ./apps/cli/ + COPY --chown=vscode:vscode ./evals/apps/web/package.json ./apps/web/ + COPY --chown=vscode:vscode ./evals/config/eslint/package.json ./config/eslint/ + COPY --chown=vscode:vscode ./evals/config/typescript/package.json ./config/typescript/ + COPY --chown=vscode:vscode ./evals/packages/db/package.json ./packages/db/ + COPY --chown=vscode:vscode ./evals/packages/ipc/package.json ./packages/ipc/ + COPY --chown=vscode:vscode ./evals/packages/lib/package.json ./packages/lib/ + COPY --chown=vscode:vscode ./evals/packages/types/package.json ./packages/types/ + RUN pnpm install + + # Copy & install extension + COPY --chown=vscode:vscode ./bin/roo-code-latest.vsix ./ + RUN code --debug --install-extension ./roo-code-latest.vsix + + # Copy application code + COPY --chown=vscode:vscode ./evals ./ + + # Copy environment variables + COPY --chown=vscode:vscode ./evals/.env ./ + + # Push database schema + RUN pnpm --filter @evals/db db:push + + EXPOSE 3000 + CMD ["pnpm", "web"] + \ No newline at end of file diff --git a/evals/apps/cli/src/index.ts b/evals/apps/cli/src/index.ts index 3bd71c86a7..a1856729ac 100644 --- a/evals/apps/cli/src/index.ts +++ b/evals/apps/cli/src/index.ts @@ -194,12 +194,32 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server console.log(`${Date.now()} [cli#runExercise] Opening new VS Code window at ${workspacePath}`) - await execa({ + const controller = new AbortController() + const cancelSignal = controller.signal + + // If debugging: + // Use --wait --log trace or --verbose. + let codeCommand = `code --disable-workspace-trust` + const isDocker = fs.existsSync("/.dockerenv") + + if (isDocker) { + codeCommand = `xvfb-run --auto-servernum --server-num=1 ${codeCommand} --wait --log trace --disable-gpu --password-store="basic"` + } + + const subprocess = execa({ env: { ROO_CODE_IPC_SOCKET_PATH: taskSocketPath, }, shell: "/bin/bash", - })`code --disable-workspace-trust -n ${workspacePath}` + cancelSignal, + })`${codeCommand} -n ${path.resolve(exercisesPath, language, exercise)}` + + // If debugging: + // subprocess.stdout.pipe(process.stdout) + + // Give VSCode some time to spawn before connectint to its unix socket. + await new Promise((resolve) => setTimeout(resolve, isDocker ? 5_000 : 1_000)) + console.log(`Connecting to ${taskSocketPath} (pid: ${subprocess.pid})`) // Give VSCode some time to spawn before connecting to its unix socket. await new Promise((resolve) => setTimeout(resolve, 3_000)) @@ -309,23 +329,30 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server console.log(`${Date.now()} [cli#runExercise | ${language} / ${exercise}] starting task`) - client.sendMessage({ - type: IpcMessageType.TaskCommand, - origin: IpcOrigin.Client, - clientId: client.clientId!, - data: { - commandName: TaskCommandName.StartNewTask, + if (client.isReady) { + client.sendMessage({ + type: IpcMessageType.TaskCommand, + origin: IpcOrigin.Client, + clientId: client.clientId!, data: { - configuration: { - ...rooCodeDefaults, - openRouterApiKey: process.env.OPENROUTER_API_KEY!, - ...run.settings, + commandName: TaskCommandName.StartNewTask, + data: { + configuration: { + ...rooCodeDefaults, + openRouterApiKey: process.env.OPENROUTER_API_KEY!, + ...run.settings, + }, + text: prompt, + newTab: true, }, - text: prompt, - newTab: true, }, - }, - }) + }) + } else { + console.log(`[cli#runExercise | ${language} / ${exercise}] unable to connect`) + client.disconnect() + taskFinishedAt = Date.now() + isClientDisconnected = true + } try { await pWaitFor(() => !!taskFinishedAt || isClientDisconnected, { interval: 1_000, timeout: TASK_TIMEOUT }) @@ -365,6 +392,9 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server client.disconnect() } + controller.abort() + await subprocess + return { success: !!taskFinishedAt } } @@ -520,7 +550,7 @@ if (!fs.existsSync(extensionDevelopmentPath)) { if (!fs.existsSync(exercisesPath)) { console.error( - `Exercises path does not exist. Please run "git clone https://github.com/cte/Roo-Code-Benchmark.git exercises".`, + `Exercises path does not exist. Please run "git clone https://github.com/RooCodeInc/Roo-Code-Evals.git exercises".`, ) process.exit(1) } diff --git a/evals/package.json b/evals/package.json index baddaec8f0..e243431a45 100644 --- a/evals/package.json +++ b/evals/package.json @@ -10,7 +10,14 @@ "build": "turbo build --log-order grouped --output-logs new-only", "web": "turbo dev --filter @evals/web", "cli": "turbo dev --filter @evals/cli -- run", - "drizzle:studio": "pnpm --filter @evals/db db:studio" + "drizzle:studio": "pnpm --filter @evals/db db:studio", + "docker:build": "docker build -f Dockerfile -t roo-code-eval --progress=plain ..", + "docker:run": "touch /tmp/evals.db && docker run -d -it -p 3000:3000 -v /tmp/evals.db:/tmp/evals.db roo-code-eval", + "docker:start": "pnpm docker:build && pnpm docker:run", + "docker:shell": "docker exec -it $(docker ps --filter \"ancestor=roo-code-eval\" -q) /bin/bash", + "docker:stop": "docker stop $(docker ps --filter \"ancestor=roo-code-eval\" -q)", + "docker:rm": "docker rm $(docker ps -a --filter \"ancestor=roo-code-eval\" -q)", + "docker:clean": "pnpm docker:stop && pnpm docker:rm" }, "devDependencies": { "@dotenvx/dotenvx": "^1.41.0", From fda0972439fd28cfe850549568ace7a102e97dff Mon Sep 17 00:00:00 2001 From: Canyon Robins Date: Thu, 29 May 2025 14:02:15 -0700 Subject: [PATCH 2/7] fixes; now it runs --- evals/Dockerfile | 8 ++++---- evals/apps/cli/src/index.ts | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/evals/Dockerfile b/evals/Dockerfile index 8ad01424db..82a6772bba 100644 --- a/evals/Dockerfile +++ b/evals/Dockerfile @@ -36,11 +36,11 @@ RUN echo 'source $HOME/.cargo/env' >> $HOME/.bashrc WORKDIR /home/vscode USER vscode - # Copy exercises - RUN git clone https://github.com/RooCodeInc/Roo-Code-Evals.git exercises + # Copy evals + RUN git clone https://github.com/RooCodeInc/Roo-Code-Evals.git evals - # Prepare exercises - WORKDIR /home/vscode/exercises/python + # Prepare evals + WORKDIR /home/vscode/evals/python RUN curl -LsSf https://astral.sh/uv/install.sh | sh RUN /home/vscode/.local/bin/uv sync diff --git a/evals/apps/cli/src/index.ts b/evals/apps/cli/src/index.ts index a1856729ac..5ccdfe3e38 100644 --- a/evals/apps/cli/src/index.ts +++ b/evals/apps/cli/src/index.ts @@ -550,7 +550,7 @@ if (!fs.existsSync(extensionDevelopmentPath)) { if (!fs.existsSync(exercisesPath)) { console.error( - `Exercises path does not exist. Please run "git clone https://github.com/RooCodeInc/Roo-Code-Evals.git exercises".`, + `Exercises do not exist at ${exercisesPath}. Please run "git clone https://github.com/RooCodeInc/Roo-Code-Evals.git evals".`, ) process.exit(1) } From f8767552a72c613d5b567c9157d848f41dadc686 Mon Sep 17 00:00:00 2001 From: Canyon Robins Date: Thu, 29 May 2025 15:04:44 -0700 Subject: [PATCH 3/7] rm --- evals/apps/cli/src/index.ts | 4 ---- 1 file changed, 4 deletions(-) diff --git a/evals/apps/cli/src/index.ts b/evals/apps/cli/src/index.ts index 5ccdfe3e38..e13a49621c 100644 --- a/evals/apps/cli/src/index.ts +++ b/evals/apps/cli/src/index.ts @@ -217,10 +217,6 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server // If debugging: // subprocess.stdout.pipe(process.stdout) - // Give VSCode some time to spawn before connectint to its unix socket. - await new Promise((resolve) => setTimeout(resolve, isDocker ? 5_000 : 1_000)) - console.log(`Connecting to ${taskSocketPath} (pid: ${subprocess.pid})`) - // Give VSCode some time to spawn before connecting to its unix socket. await new Promise((resolve) => setTimeout(resolve, 3_000)) console.log(`${Date.now()} [cli#runExercise] Connecting to ${taskSocketPath}`) From 0da4f8d74c6b366893470996d02b644187fe96bf Mon Sep 17 00:00:00 2001 From: Canyon Robins Date: Thu, 29 May 2025 15:08:30 -0700 Subject: [PATCH 4/7] remove commented out files --- .dockerignore | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/.dockerignore b/.dockerignore index 3bccd1a0ae..eacfebecb2 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,29 +1,18 @@ -# Version control -# .git/ -# .gitignore -# .gitattributes -# .git-blame-ignore-revs -# .gitconfig - - # Build artifacts - bin/ - !bin/roo-code-latest.vsix - dist/ - **/dist/ - out/ +# Build artifacts +bin/ +!bin/roo-code-latest.vsix +dist/ +**/dist/ +out/ **/out/ # Dependencies node_modules/ **/node_modules/ + # Test and development files coverage/ **/.vscode-test/ -# Configuration files -# .env* + knip.json .husky/ -# CI/CD -# .changeset/ -# .github/ -# ellipsis.yaml From ac893871ef7cc0607e3fd297d9b6bf3dd096e2c0 Mon Sep 17 00:00:00 2001 From: Canyon Robins Date: Thu, 29 May 2025 15:09:21 -0700 Subject: [PATCH 5/7] remove trailing space --- evals/Dockerfile | 1 - 1 file changed, 1 deletion(-) diff --git a/evals/Dockerfile b/evals/Dockerfile index 82a6772bba..6c4219d762 100644 --- a/evals/Dockerfile +++ b/evals/Dockerfile @@ -76,4 +76,3 @@ RUN echo 'source $HOME/.cargo/env' >> $HOME/.bashrc EXPOSE 3000 CMD ["pnpm", "web"] - \ No newline at end of file From 0317a5a3d1a730b7da1ddf3f0b233b5b74c1fe48 Mon Sep 17 00:00:00 2001 From: Canyon Robins Date: Thu, 29 May 2025 15:11:59 -0700 Subject: [PATCH 6/7] nit --- evals/apps/cli/src/index.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evals/apps/cli/src/index.ts b/evals/apps/cli/src/index.ts index e13a49621c..013da564e2 100644 --- a/evals/apps/cli/src/index.ts +++ b/evals/apps/cli/src/index.ts @@ -212,7 +212,7 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server }, shell: "/bin/bash", cancelSignal, - })`${codeCommand} -n ${path.resolve(exercisesPath, language, exercise)}` + })`${codeCommand} -n ${workspacePath}` // If debugging: // subprocess.stdout.pipe(process.stdout) From a30241480a356814d518b64aa27c31b0cd0a01f6 Mon Sep 17 00:00:00 2001 From: Canyon Robins Date: Thu, 29 May 2025 15:29:43 -0700 Subject: [PATCH 7/7] throw error on concurrency attempt --- evals/apps/cli/src/index.ts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/evals/apps/cli/src/index.ts b/evals/apps/cli/src/index.ts index 013da564e2..88ab824b09 100644 --- a/evals/apps/cli/src/index.ts +++ b/evals/apps/cli/src/index.ts @@ -203,6 +203,9 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server const isDocker = fs.existsSync("/.dockerenv") if (isDocker) { + if (run.concurrency > 1) { + throw new Error("Cannot run multiple tasks in parallel in Docker. Please set concurrency to 1.") + } codeCommand = `xvfb-run --auto-servernum --server-num=1 ${codeCommand} --wait --log trace --disable-gpu --password-store="basic"` }