Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

# Build artifacts
bin/
!bin/roo-code-latest.vsix
dist/
**/dist/
out/
Expand Down
48 changes: 26 additions & 22 deletions benchmark/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,8 +1,3 @@
# docker build -f Dockerfile.base -t roo-code-benchmark-base ..
# docker build -f Dockerfile -t roo-code-benchmark ..
# docker run -d -it -p 3000:3000 -v /tmp/benchmarks.db:/tmp/benchmarks.db roo-code-benchmark
# docker exec -it $(docker ps --filter "ancestor=roo-code-benchmark" -q) /bin/bash

FROM node:20-slim AS base
ENV PNPM_HOME="/pnpm"
ENV PATH="$PNPM_HOME:$PATH"
Expand Down Expand Up @@ -49,34 +44,43 @@ RUN echo 'source $HOME/.cargo/env' >> $HOME/.bashrc
WORKDIR /home/vscode
USER vscode

COPY benchmark/entrypoint.sh /usr/local/bin/entrypoint.sh

# Copy exercises
WORKDIR /home/vscode
RUN git clone https://github.com/cte/Roo-Code-Benchmark.git exercises

# Prepare exercises
WORKDIR /home/vscode/exercises/python
RUN curl -LsSf https://astral.sh/uv/install.sh | sh
RUN /home/vscode/.local/bin/uv sync

# Copy and build extension dependencies
WORKDIR /home/vscode/repo
COPY --chown=vscode:vscode package*.json /home/vscode/repo/
COPY --chown=vscode:vscode webview-ui/package*.json /home/vscode/repo/webview-ui/
RUN npm-run-all -l -p install-extension install-webview

# Copy extension and benchmark code
COPY --chown=vscode:vscode . /home/vscode/repo

# Build extension dependencies
WORKDIR /home/vscode/repo/benchmark

# Install dependencies
COPY --chown=vscode:vscode ./benchmark/package.json ./benchmark/pnpm-lock.yaml ./benchmark/pnpm-workspace.yaml ./benchmark/.npmrc ./
RUN mkdir -p apps/cli apps/web \
config/eslint config/typescript \
packages/db packages/ipc packages/lib packages/types
COPY --chown=vscode:vscode ./benchmark/apps/cli/package.json ./apps/cli/
COPY --chown=vscode:vscode ./benchmark/apps/web/package.json ./apps/web/
COPY --chown=vscode:vscode ./benchmark/config/eslint/package.json ./config/eslint/
COPY --chown=vscode:vscode ./benchmark/config/typescript/package.json ./config/typescript/
COPY --chown=vscode:vscode ./benchmark/packages/db/package.json ./packages/db/
COPY --chown=vscode:vscode ./benchmark/packages/ipc/package.json ./packages/ipc/
COPY --chown=vscode:vscode ./benchmark/packages/lib/package.json ./packages/lib/
COPY --chown=vscode:vscode ./benchmark/packages/types/package.json ./packages/types/
RUN pnpm install

# Initialize database
RUN echo "BENCHMARKS_DB_PATH=file:/tmp/benchmarks.db" > .env
# Copy & install extension
COPY --chown=vscode:vscode ./bin/roo-code-latest.vsix ./
RUN code --debug --install-extension ./roo-code-latest.vsix

# Copy application code
COPY --chown=vscode:vscode ./benchmark ./

# Copy environment variables
COPY --chown=vscode:vscode ./benchmark/.env ./

# Push database schema
RUN pnpm --filter @benchmark/db db:push

EXPOSE 3000
ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
CMD ["/usr/bin/pnpm", "dev"]
CMD ["pnpm", "web"]
34 changes: 12 additions & 22 deletions benchmark/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,12 @@

## Get Started

[Install](https://docs.docker.com/desktop/) and run Docker Desktop.

Build a container to run the Roo Code benchmarks:
Clone the Roo Code repo:

```sh
docker build -f Dockerfile -t roo-code-benchmark ..
git clone https://github.com/RooVetGit/Roo-Code.git
```

## Local Debugging

Install nvm:

```sh
Expand All @@ -28,28 +24,22 @@ corepack enable pnpm
corepack use pnpm@latest-10
```

Install dependencies:
Build the Roo Code extension:

```sh
pnpm install
npm run install:all
npx vsce package --out bin/roo-code-latest.vsix
code --install-extension bin/roo-code-latest.vsix
```

Configure database:

```sh
cp packages/server/.env.sample packages/server/.env
# Update BENCHMARKS_DB_PATH as needed in `packages/server/.env`.
pnpm --filter @benchmark/db db:push
```
[Install](https://docs.docker.com/desktop/) and run Docker Desktop.

Run the web app:
Build a container to run the Roo Code evals:

```sh
pnpm web
cd benchmark
pnpm install
pnpm docker:start
```

Run an exercise via the cli:

```sh
pnpm cli run [cpp|go|java|javascript|python|rust|all] [<exercise>|all]
```
Navigation to [localhost:3000](http://localhost:3000/) in your browser.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider changing 'Navigation to localhost:3000 in your browser.' to 'Navigate to localhost:3000 in your browser.' for a clearer call to action.

Suggested change
Navigation to [localhost:3000](http://localhost:3000/) in your browser.
Navigate to [localhost:3000](http://localhost:3000/) in your browser.

147 changes: 92 additions & 55 deletions benchmark/apps/cli/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ import { IpcServer, IpcClient } from "@benchmark/ipc"
import { __dirname, extensionDevelopmentPath, exercisesPath } from "./paths.js"
import { getExercises } from "./exercises.js"

const maxConcurrency = 2
const taskTimeLimit = 5 * 60 * 1_000

const testCommands: Record<ExerciseLanguage, { commands: string[]; timeout?: number; cwd?: string }> = {
cpp: { commands: ["cmake -G 'Unix\\ Makefiles' -DEXERCISM_RUN_ALL_TESTS=1 ..", "make"], cwd: "build" }, // timeout 15s bash -c "cd '$dir' && mkdir -p build && cd build && cmake -G 'Unix Makefiles' -DEXERCISM_RUN_ALL_TESTS=1 .. >/dev/null 2>&1 && make >/dev/null 2>&1"
go: { commands: ["go test"] }, // timeout 15s bash -c "cd '$dir' && go test > /dev/null 2>&1"
Expand All @@ -42,8 +45,6 @@ const testCommands: Record<ExerciseLanguage, { commands: string[]; timeout?: num
rust: { commands: ["cargo test"] }, // timeout 15s bash -c "cd '$dir' && cargo test > /dev/null 2>&1"
}

let parentPid: number | undefined = undefined

const run = async (toolbox: GluegunToolbox) => {
const { config, prompt } = toolbox

Expand Down Expand Up @@ -93,6 +94,8 @@ const run = async (toolbox: GluegunToolbox) => {
throw new Error("No tasks found.")
}

console.log(await execa({ cwd: exercisesPath })`git config user.name "Roo Code"`)
console.log(await execa({ cwd: exercisesPath })`git config user.email "[email protected]"`)
console.log(await execa({ cwd: exercisesPath })`git checkout -f`)
console.log(await execa({ cwd: exercisesPath })`git clean -fd`)
console.log(await execa({ cwd: exercisesPath })`git checkout -b runs/${run.id} main`)
Expand All @@ -113,7 +116,6 @@ const run = async (toolbox: GluegunToolbox) => {
// })
// })

const maxConcurrency = 3
const runningPromises: Promise<void>[] = []

const processTask = async (task: Task) => {
Expand Down Expand Up @@ -147,10 +149,11 @@ const run = async (toolbox: GluegunToolbox) => {
await Promise.all(runningPromises)

const result = await finishRun(run.id)
console.log("[cli#run]", result)

if (parentPid) {
console.log(await execa`kill -INT ${parentPid}`)
try {
console.log("[cli#run]", result)
// eslint-disable-next-line @typescript-eslint/no-unused-vars
} catch (error) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Avoid empty catch blocks. Instead of commenting out the error log (lines 151-155), consider logging error details using a structured logging method so that errors aren’t silently ignored.

// console.error(error)
}

console.log(await execa({ cwd: exercisesPath })`git add .`)
Expand All @@ -163,18 +166,38 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server
const dirname = path.dirname(run.socketPath)
const taskSocketPath = path.resolve(dirname, `${dirname}/task-${task.id}.sock`)

await execa({
env: { ROO_CODE_IPC_SOCKET_PATH: taskSocketPath },
})`code -n ${path.resolve(exercisesPath, language, exercise)}`
const controller = new AbortController()
const cancelSignal = controller.signal

// If debugging:
// Use --wait --log trace or --verbose.
let codeCommand = `code --disable-workspace-trust`
const isDocker = fs.existsSync("/.dockerenv")

console.log(`Connecting to ${taskSocketPath}`)
if (isDocker) {
codeCommand = `xvfb-run --auto-servernum --server-num=1 ${codeCommand} --wait --log trace --disable-gpu --password-store="basic"`
}

const subprocess = execa({
env: {
ROO_CODE_IPC_SOCKET_PATH: taskSocketPath,
},
shell: "/bin/bash",
cancelSignal,
})`${codeCommand} -n ${path.resolve(exercisesPath, language, exercise)}`

// If debugging:
// subprocess.stdout.pipe(process.stdout)

// Give VSCode some time to spawn before connectint to its unix socket.
await new Promise((resolve) => setTimeout(resolve, isDocker ? 5_000 : 1_000))
console.log(`Connecting to ${taskSocketPath} (pid: ${subprocess.pid})`)

const createClient = (taskSocketPath: string) => {
const ipcClient = new IpcClient(taskSocketPath)

ipcClient.on(IpcMessageType.Ack, (ack) => {
console.log(`[cli#runExercise | ${language} / ${exercise}] ack`, ack)
parentPid = ack.ppid
})

return ipcClient
Expand All @@ -185,7 +208,7 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server

while (++tries < 5) {
try {
await pWaitFor(() => client.isReady, { interval: 100, timeout: 2_000 })
await pWaitFor(() => client.isReady, { interval: 100, timeout: 5_000 })
break
} catch (error) {
console.error(error)
Expand All @@ -194,24 +217,19 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server
}
}

if (!client.isReady) {
client.disconnect()
console.log(`[cli#runExercise | ${language} / ${exercise}] unable to connect`)
return
}

let isTaskFinished = false
let isClientDisconnected = false

client.on(IpcMessageType.Disconnect, async () => {
console.log(`[cli#runExercise | ${language} / ${exercise}] disconnect`)
// await updateTask(task.id, { finishedAt: new Date() })
isTaskFinished = true
isClientDisconnected = true
})

const ignoreEvents = [
const ignoreEvents: RooCodeEventName[] = [
RooCodeEventName.Message,
RooCodeEventName.TaskTokenUsageUpdated,
RooCodeEventName.TaskAskResponded,
// RooCodeEventName.TaskTokenUsageUpdated,
// RooCodeEventName.TaskAskResponded,
]

let taskStartedAt = Date.now()
Expand All @@ -230,6 +248,7 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server

if (!ignoreEvents.includes(eventName)) {
console.log(`[cli#runExercise | ${language} / ${exercise}] taskEvent -> ${eventName}`)
// console.log(payload)
}

if (eventName === RooCodeEventName.TaskStarted) {
Expand Down Expand Up @@ -278,33 +297,40 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server
}
})

client.sendMessage({
type: IpcMessageType.TaskCommand,
origin: IpcOrigin.Client,
clientId: client.clientId!,
data: {
commandName: TaskCommandName.StartNewTask,
if (client.isReady) {
client.sendMessage({
type: IpcMessageType.TaskCommand,
origin: IpcOrigin.Client,
clientId: client.clientId!,
data: {
configuration: {
...rooCodeDefaults,
openRouterApiKey: process.env.OPENROUTER_API_KEY!,
...run.settings,
commandName: TaskCommandName.StartNewTask,
data: {
configuration: {
...rooCodeDefaults,
openRouterApiKey: process.env.OPENROUTER_API_KEY!,
...run.settings,
},
text: prompt,
newTab: true,
},
text: prompt,
newTab: true,
},
},
})
})

console.log(`[cli#runExercise | ${language} / ${exercise}] starting task`)
console.log(`[cli#runExercise | ${language} / ${exercise}] starting task`)
} else {
console.log(`[cli#runExercise | ${language} / ${exercise}] unable to connect`)
client.disconnect()
isTaskFinished = true
isClientDisconnected = true
}

try {
await pWaitFor(() => isTaskFinished, { interval: 1_000, timeout: 1 * 60 * 1_000 })
await pWaitFor(() => isTaskFinished, { interval: 1_000, timeout: taskTimeLimit })
// eslint-disable-next-line @typescript-eslint/no-unused-vars
} catch (error) {
console.log(`[cli#runExercise | ${language} / ${exercise}] time limit reached`)

if (rooTaskId) {
if (rooTaskId && !isClientDisconnected) {
client.sendMessage({
type: IpcMessageType.TaskCommand,
origin: IpcOrigin.Client,
Expand All @@ -318,24 +344,35 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server
await updateTask(task.id, { finishedAt: new Date() })
}

try {
client.sendMessage({
type: IpcMessageType.VSCodeCommand,
origin: IpcOrigin.Client,
clientId: client.clientId!,
data: "workbench.action.files.saveFiles",
})
if (!isClientDisconnected) {
try {
client.sendMessage({
type: IpcMessageType.VSCodeCommand,
origin: IpcOrigin.Client,
clientId: client.clientId!,
data: "workbench.action.files.saveFiles",
})

client.sendMessage({
type: IpcMessageType.VSCodeCommand,
origin: IpcOrigin.Client,
clientId: client.clientId!,
data: "workbench.action.closeWindow",
})
client.sendMessage({
type: IpcMessageType.VSCodeCommand,
origin: IpcOrigin.Client,
clientId: client.clientId!,
data: "workbench.action.closeWindow",
})

client.disconnect()
client.disconnect()
} catch (error) {
console.error(error)
}
}

try {
console.log(`[cli#runExercise | ${language} / ${exercise}] aborting subprocess`)
controller.abort()
await subprocess
// eslint-disable-next-line @typescript-eslint/no-unused-vars
} catch (error) {
console.error(error)
// console.error(error)
}
}

Expand Down
Loading