diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000000..eacfebecb2 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,18 @@ +# Build artifacts +bin/ +!bin/roo-code-latest.vsix +dist/ +**/dist/ +out/ +**/out/ + +# Dependencies +node_modules/ +**/node_modules/ + +# Test and development files +coverage/ +**/.vscode-test/ + +knip.json +.husky/ diff --git a/evals/Dockerfile b/evals/Dockerfile new file mode 100644 index 0000000000..6c4219d762 --- /dev/null +++ b/evals/Dockerfile @@ -0,0 +1,78 @@ +FROM node:20-slim AS base + ENV PNPM_HOME="/pnpm" + ENV PATH="$PNPM_HOME:$PATH" +RUN corepack enable +RUN npm install -g npm@latest +RUN npm install -g npm-run-all +# Install dependencies +RUN apt update && apt install -y sudo curl git vim jq + + +# Create a `vscode` user +RUN useradd -m vscode -s /bin/bash && \ + echo "vscode ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/vscode && \ + chmod 0440 /etc/sudoers.d/vscode +# Install VS Code +# https://code.visualstudio.com/docs/setup/linux +RUN apt install -y wget gpg apt-transport-https +RUN wget -qO- https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor > packages.microsoft.gpg +RUN install -D -o root -g root -m 644 packages.microsoft.gpg /etc/apt/keyrings/packages.microsoft.gpg +RUN echo "deb [arch=amd64,arm64,armhf signed-by=/etc/apt/keyrings/packages.microsoft.gpg] https://packages.microsoft.com/repos/code stable main" | tee /etc/apt/sources.list.d/vscode.list > /dev/null +RUN rm -f packages.microsoft.gpg +RUN apt update && apt install -y code +# Install Xvfb +RUN apt install -y xvfb +# [cpp] Install cmake 3.28.3 +RUN apt install -y cmake +# [go] Install Go 1.22.2 +RUN apt install -y golang-go +# [java] Install Java 21 +RUN apt install -y default-jre +# [python] Install Python 3.12.3 and uv 0.6.6 +RUN apt install -y python3 python3-venv python3-dev python3-pip +# [rust] Install Rust 1.85 +RUN curl https://sh.rustup.rs -sSf | bash -s -- -y +RUN echo 'source $HOME/.cargo/env' >> $HOME/.bashrc + WORKDIR /home/vscode + USER vscode + + # Copy evals + RUN git clone https://github.com/RooCodeInc/Roo-Code-Evals.git evals + + # Prepare evals + WORKDIR /home/vscode/evals/python + RUN curl -LsSf https://astral.sh/uv/install.sh | sh + RUN /home/vscode/.local/bin/uv sync + + WORKDIR /home/vscode/repo/benchmark + + # Install dependencies + COPY --chown=vscode:vscode ./evals/package.json ./evals/pnpm-lock.yaml ./evals/pnpm-workspace.yaml ./evals/.npmrc ./ + RUN mkdir -p apps/cli apps/web \ + config/eslint config/typescript \ + packages/db packages/ipc packages/lib packages/types + COPY --chown=vscode:vscode ./evals/apps/cli/package.json ./apps/cli/ + COPY --chown=vscode:vscode ./evals/apps/web/package.json ./apps/web/ + COPY --chown=vscode:vscode ./evals/config/eslint/package.json ./config/eslint/ + COPY --chown=vscode:vscode ./evals/config/typescript/package.json ./config/typescript/ + COPY --chown=vscode:vscode ./evals/packages/db/package.json ./packages/db/ + COPY --chown=vscode:vscode ./evals/packages/ipc/package.json ./packages/ipc/ + COPY --chown=vscode:vscode ./evals/packages/lib/package.json ./packages/lib/ + COPY --chown=vscode:vscode ./evals/packages/types/package.json ./packages/types/ + RUN pnpm install + + # Copy & install extension + COPY --chown=vscode:vscode ./bin/roo-code-latest.vsix ./ + RUN code --debug --install-extension ./roo-code-latest.vsix + + # Copy application code + COPY --chown=vscode:vscode ./evals ./ + + # Copy environment variables + COPY --chown=vscode:vscode ./evals/.env ./ + + # Push database schema + RUN pnpm --filter @evals/db db:push + + EXPOSE 3000 + CMD ["pnpm", "web"] diff --git a/evals/apps/cli/src/index.ts b/evals/apps/cli/src/index.ts index 3bd71c86a7..88ab824b09 100644 --- a/evals/apps/cli/src/index.ts +++ b/evals/apps/cli/src/index.ts @@ -194,12 +194,31 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server console.log(`${Date.now()} [cli#runExercise] Opening new VS Code window at ${workspacePath}`) - await execa({ + const controller = new AbortController() + const cancelSignal = controller.signal + + // If debugging: + // Use --wait --log trace or --verbose. + let codeCommand = `code --disable-workspace-trust` + const isDocker = fs.existsSync("/.dockerenv") + + if (isDocker) { + if (run.concurrency > 1) { + throw new Error("Cannot run multiple tasks in parallel in Docker. Please set concurrency to 1.") + } + codeCommand = `xvfb-run --auto-servernum --server-num=1 ${codeCommand} --wait --log trace --disable-gpu --password-store="basic"` + } + + const subprocess = execa({ env: { ROO_CODE_IPC_SOCKET_PATH: taskSocketPath, }, shell: "/bin/bash", - })`code --disable-workspace-trust -n ${workspacePath}` + cancelSignal, + })`${codeCommand} -n ${workspacePath}` + + // If debugging: + // subprocess.stdout.pipe(process.stdout) // Give VSCode some time to spawn before connecting to its unix socket. await new Promise((resolve) => setTimeout(resolve, 3_000)) @@ -309,23 +328,30 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server console.log(`${Date.now()} [cli#runExercise | ${language} / ${exercise}] starting task`) - client.sendMessage({ - type: IpcMessageType.TaskCommand, - origin: IpcOrigin.Client, - clientId: client.clientId!, - data: { - commandName: TaskCommandName.StartNewTask, + if (client.isReady) { + client.sendMessage({ + type: IpcMessageType.TaskCommand, + origin: IpcOrigin.Client, + clientId: client.clientId!, data: { - configuration: { - ...rooCodeDefaults, - openRouterApiKey: process.env.OPENROUTER_API_KEY!, - ...run.settings, + commandName: TaskCommandName.StartNewTask, + data: { + configuration: { + ...rooCodeDefaults, + openRouterApiKey: process.env.OPENROUTER_API_KEY!, + ...run.settings, + }, + text: prompt, + newTab: true, }, - text: prompt, - newTab: true, }, - }, - }) + }) + } else { + console.log(`[cli#runExercise | ${language} / ${exercise}] unable to connect`) + client.disconnect() + taskFinishedAt = Date.now() + isClientDisconnected = true + } try { await pWaitFor(() => !!taskFinishedAt || isClientDisconnected, { interval: 1_000, timeout: TASK_TIMEOUT }) @@ -365,6 +391,9 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server client.disconnect() } + controller.abort() + await subprocess + return { success: !!taskFinishedAt } } @@ -520,7 +549,7 @@ if (!fs.existsSync(extensionDevelopmentPath)) { if (!fs.existsSync(exercisesPath)) { console.error( - `Exercises path does not exist. Please run "git clone https://github.com/cte/Roo-Code-Benchmark.git exercises".`, + `Exercises do not exist at ${exercisesPath}. Please run "git clone https://github.com/RooCodeInc/Roo-Code-Evals.git evals".`, ) process.exit(1) } diff --git a/evals/package.json b/evals/package.json index baddaec8f0..e243431a45 100644 --- a/evals/package.json +++ b/evals/package.json @@ -10,7 +10,14 @@ "build": "turbo build --log-order grouped --output-logs new-only", "web": "turbo dev --filter @evals/web", "cli": "turbo dev --filter @evals/cli -- run", - "drizzle:studio": "pnpm --filter @evals/db db:studio" + "drizzle:studio": "pnpm --filter @evals/db db:studio", + "docker:build": "docker build -f Dockerfile -t roo-code-eval --progress=plain ..", + "docker:run": "touch /tmp/evals.db && docker run -d -it -p 3000:3000 -v /tmp/evals.db:/tmp/evals.db roo-code-eval", + "docker:start": "pnpm docker:build && pnpm docker:run", + "docker:shell": "docker exec -it $(docker ps --filter \"ancestor=roo-code-eval\" -q) /bin/bash", + "docker:stop": "docker stop $(docker ps --filter \"ancestor=roo-code-eval\" -q)", + "docker:rm": "docker rm $(docker ps -a --filter \"ancestor=roo-code-eval\" -q)", + "docker:clean": "pnpm docker:stop && pnpm docker:rm" }, "devDependencies": { "@dotenvx/dotenvx": "^1.41.0",