Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Version control
# .git/
# .gitignore
# .gitattributes
# .git-blame-ignore-revs
# .gitconfig

# Build artifacts
bin/
!bin/roo-code-latest.vsix
dist/
**/dist/
out/
**/out/

# Dependencies
node_modules/
**/node_modules/
# Test and development files
coverage/
**/.vscode-test/
# Configuration files
# .env*
knip.json
.husky/
# CI/CD
# .changeset/
# .github/
# ellipsis.yaml
79 changes: 79 additions & 0 deletions evals/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
FROM node:20-slim AS base
ENV PNPM_HOME="/pnpm"
ENV PATH="$PNPM_HOME:$PATH"
RUN corepack enable
RUN npm install -g npm@latest
RUN npm install -g npm-run-all
# Install dependencies
RUN apt update && apt install -y sudo curl git vim jq


# Create a `vscode` user
RUN useradd -m vscode -s /bin/bash && \
echo "vscode ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/vscode && \
chmod 0440 /etc/sudoers.d/vscode
# Install VS Code
# https://code.visualstudio.com/docs/setup/linux
RUN apt install -y wget gpg apt-transport-https
RUN wget -qO- https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor > packages.microsoft.gpg
RUN install -D -o root -g root -m 644 packages.microsoft.gpg /etc/apt/keyrings/packages.microsoft.gpg
RUN echo "deb [arch=amd64,arm64,armhf signed-by=/etc/apt/keyrings/packages.microsoft.gpg] https://packages.microsoft.com/repos/code stable main" | tee /etc/apt/sources.list.d/vscode.list > /dev/null
RUN rm -f packages.microsoft.gpg
RUN apt update && apt install -y code
# Install Xvfb
RUN apt install -y xvfb
# [cpp] Install cmake 3.28.3
RUN apt install -y cmake
# [go] Install Go 1.22.2
RUN apt install -y golang-go
# [java] Install Java 21
RUN apt install -y default-jre
# [python] Install Python 3.12.3 and uv 0.6.6
RUN apt install -y python3 python3-venv python3-dev python3-pip
# [rust] Install Rust 1.85
RUN curl https://sh.rustup.rs -sSf | bash -s -- -y
RUN echo 'source $HOME/.cargo/env' >> $HOME/.bashrc
WORKDIR /home/vscode
USER vscode

# Copy evals
RUN git clone https://github.com/RooCodeInc/Roo-Code-Evals.git evals

# Prepare evals
WORKDIR /home/vscode/evals/python
RUN curl -LsSf https://astral.sh/uv/install.sh | sh
RUN /home/vscode/.local/bin/uv sync

WORKDIR /home/vscode/repo/benchmark

# Install dependencies
COPY --chown=vscode:vscode ./evals/package.json ./evals/pnpm-lock.yaml ./evals/pnpm-workspace.yaml ./evals/.npmrc ./
RUN mkdir -p apps/cli apps/web \
config/eslint config/typescript \
packages/db packages/ipc packages/lib packages/types
COPY --chown=vscode:vscode ./evals/apps/cli/package.json ./apps/cli/
COPY --chown=vscode:vscode ./evals/apps/web/package.json ./apps/web/
COPY --chown=vscode:vscode ./evals/config/eslint/package.json ./config/eslint/
COPY --chown=vscode:vscode ./evals/config/typescript/package.json ./config/typescript/
COPY --chown=vscode:vscode ./evals/packages/db/package.json ./packages/db/
COPY --chown=vscode:vscode ./evals/packages/ipc/package.json ./packages/ipc/
COPY --chown=vscode:vscode ./evals/packages/lib/package.json ./packages/lib/
COPY --chown=vscode:vscode ./evals/packages/types/package.json ./packages/types/
RUN pnpm install

# Copy & install extension
COPY --chown=vscode:vscode ./bin/roo-code-latest.vsix ./
RUN code --debug --install-extension ./roo-code-latest.vsix

# Copy application code
COPY --chown=vscode:vscode ./evals ./

# Copy environment variables
COPY --chown=vscode:vscode ./evals/.env ./

# Push database schema
RUN pnpm --filter @evals/db db:push

EXPOSE 3000
CMD ["pnpm", "web"]

64 changes: 47 additions & 17 deletions evals/apps/cli/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -194,12 +194,32 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server

console.log(`${Date.now()} [cli#runExercise] Opening new VS Code window at ${workspacePath}`)

await execa({
const controller = new AbortController()
const cancelSignal = controller.signal

// If debugging:
// Use --wait --log trace or --verbose.
let codeCommand = `code --disable-workspace-trust`
const isDocker = fs.existsSync("/.dockerenv")

if (isDocker) {
codeCommand = `xvfb-run --auto-servernum --server-num=1 ${codeCommand} --wait --log trace --disable-gpu --password-store="basic"`
}

const subprocess = execa({
env: {
ROO_CODE_IPC_SOCKET_PATH: taskSocketPath,
},
shell: "/bin/bash",
})`code --disable-workspace-trust -n ${workspacePath}`
cancelSignal,
})`${codeCommand} -n ${path.resolve(exercisesPath, language, exercise)}`

// If debugging:
// subprocess.stdout.pipe(process.stdout)

// Give VSCode some time to spawn before connectint to its unix socket.
await new Promise((resolve) => setTimeout(resolve, isDocker ? 5_000 : 1_000))
console.log(`Connecting to ${taskSocketPath} (pid: ${subprocess.pid})`)

// Give VSCode some time to spawn before connecting to its unix socket.
await new Promise((resolve) => setTimeout(resolve, 3_000))
Expand Down Expand Up @@ -309,23 +329,30 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server

console.log(`${Date.now()} [cli#runExercise | ${language} / ${exercise}] starting task`)

client.sendMessage({
type: IpcMessageType.TaskCommand,
origin: IpcOrigin.Client,
clientId: client.clientId!,
data: {
commandName: TaskCommandName.StartNewTask,
if (client.isReady) {
client.sendMessage({
type: IpcMessageType.TaskCommand,
origin: IpcOrigin.Client,
clientId: client.clientId!,
data: {
configuration: {
...rooCodeDefaults,
openRouterApiKey: process.env.OPENROUTER_API_KEY!,
...run.settings,
commandName: TaskCommandName.StartNewTask,
data: {
configuration: {
...rooCodeDefaults,
openRouterApiKey: process.env.OPENROUTER_API_KEY!,
...run.settings,
},
text: prompt,
newTab: true,
},
text: prompt,
newTab: true,
},
},
})
})
} else {
console.log(`[cli#runExercise | ${language} / ${exercise}] unable to connect`)
client.disconnect()
taskFinishedAt = Date.now()
isClientDisconnected = true
}

try {
await pWaitFor(() => !!taskFinishedAt || isClientDisconnected, { interval: 1_000, timeout: TASK_TIMEOUT })
Expand Down Expand Up @@ -365,6 +392,9 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server
client.disconnect()
}

controller.abort()
await subprocess

return { success: !!taskFinishedAt }
}

Expand Down Expand Up @@ -520,7 +550,7 @@ if (!fs.existsSync(extensionDevelopmentPath)) {

if (!fs.existsSync(exercisesPath)) {
console.error(
`Exercises path does not exist. Please run "git clone https://github.com/cte/Roo-Code-Benchmark.git exercises".`,
`Exercises do not exist at ${exercisesPath}. Please run "git clone https://github.com/RooCodeInc/Roo-Code-Evals.git evals".`,
)
process.exit(1)
}
Expand Down
9 changes: 8 additions & 1 deletion evals/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,14 @@
"build": "turbo build --log-order grouped --output-logs new-only",
"web": "turbo dev --filter @evals/web",
"cli": "turbo dev --filter @evals/cli -- run",
"drizzle:studio": "pnpm --filter @evals/db db:studio"
"drizzle:studio": "pnpm --filter @evals/db db:studio",
"docker:build": "docker build -f Dockerfile -t roo-code-eval --progress=plain ..",
"docker:run": "touch /tmp/evals.db && docker run -d -it -p 3000:3000 -v /tmp/evals.db:/tmp/evals.db roo-code-eval",
"docker:start": "pnpm docker:build && pnpm docker:run",
"docker:shell": "docker exec -it $(docker ps --filter \"ancestor=roo-code-eval\" -q) /bin/bash",
"docker:stop": "docker stop $(docker ps --filter \"ancestor=roo-code-eval\" -q)",
"docker:rm": "docker rm $(docker ps -a --filter \"ancestor=roo-code-eval\" -q)",
"docker:clean": "pnpm docker:stop && pnpm docker:rm"
},
"devDependencies": {
"@dotenvx/dotenvx": "^1.41.0",
Expand Down