Skip to content

Commit 3298c41

Browse files
committed
More progress
1 parent 6082ec2 commit 3298c41

File tree

4 files changed

+13
-11
lines changed

4 files changed

+13
-11
lines changed

benchmark/.env.sample

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,2 @@
11
BENCHMARKS_DB_PATH=file:/tmp/benchmarks.db
22
OPENROUTER_API_KEY=sk-or-v1-...
3-
POSTHOG_API_KEY=phc_...

benchmark/README.md

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ Install nvm:
1414
curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.2/install.sh | bash
1515
# Reload shell.
1616
nvm install
17+
node --version
18+
# Verify the version is v20.18.1.
1719
```
1820

1921
Install pnpm:
@@ -29,7 +31,6 @@ Build the Roo Code extension:
2931
```sh
3032
npm run install:all
3133
npx vsce package --out bin/roo-code-latest.vsix
32-
code --install-extension bin/roo-code-latest.vsix
3334
```
3435

3536
[Install](https://docs.docker.com/desktop/) and run Docker Desktop.
@@ -39,7 +40,9 @@ Build a container to run the Roo Code evals:
3940
```sh
4041
cd benchmark
4142
pnpm install
43+
cp .env.sample .env
44+
# Update OPENROUTER_API_KEY=... with your actual API key.
4245
pnpm docker:start
4346
```
4447

45-
Navigation to [localhost:3000](http://localhost:3000/) in your browser.
48+
Navigate to [localhost:3000](http://localhost:3000/) in your browser.

benchmark/apps/cli/src/index.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -227,9 +227,9 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server
227227
})
228228

229229
const ignoreEvents: RooCodeEventName[] = [
230-
RooCodeEventName.Message,
231-
// RooCodeEventName.TaskTokenUsageUpdated,
232-
// RooCodeEventName.TaskAskResponded,
230+
// RooCodeEventName.Message,
231+
RooCodeEventName.TaskTokenUsageUpdated,
232+
RooCodeEventName.TaskAskResponded,
233233
]
234234

235235
let taskStartedAt = Date.now()
@@ -248,7 +248,7 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server
248248

249249
if (!ignoreEvents.includes(eventName)) {
250250
console.log(`[cli#runExercise | ${language} / ${exercise}] taskEvent -> ${eventName}`)
251-
// console.log(payload)
251+
console.log(payload)
252252
}
253253

254254
if (eventName === RooCodeEventName.TaskStarted) {

benchmark/package.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,11 @@
1212
"cli": "turbo dev --filter @benchmark/cli --output-logs new-only --ui tui -- run",
1313
"drizzle:studio": "pnpm --filter @benchmark/db db:studio",
1414
"docker:build": "docker build -f Dockerfile -t roo-code-benchmark --progress=plain ..",
15-
"docker:run": "touch /tmp/benchmarks.db && docker run -d -it -p 3000:3000 -v /tmp/benchmarks.db:/tmp/benchmarks.db roo-code-benchmark",
15+
"docker:run": "docker run -d -it -p 3000:3000 -v /tmp/benchmarks.db:/tmp/benchmarks.db roo-code-benchmark",
1616
"docker:start": "pnpm docker:build && pnpm docker:run",
17-
"docker:shell": "docker exec -it $(docker ps --filter \"ancestor=roo-code-benchmark\" -q) /bin/bash",
18-
"docker:stop": "docker stop $(docker ps --filter \"ancestor=roo-code-benchmark\" -q)",
19-
"docker:rm": "docker rm $(docker ps -a --filter \"ancestor=roo-code-benchmark\" -q)",
17+
"docker:shell": "docker exec -it $(docker ps --filter \"ancestor=roo-code-benchmark\" -q) /bin/zsh",
18+
"docker:stop": "docker stop $(docker ps --filter \"ancestor=roo-code-benchmark\" -q) >/dev/null 2>&1 || true",
19+
"docker:rm": "docker rm $(docker ps -a --filter \"ancestor=roo-code-benchmark\" -q) >/dev/null 2>&1 || true",
2020
"docker:clean": "pnpm docker:stop && pnpm docker:rm"
2121
},
2222
"devDependencies": {

0 commit comments

Comments
 (0)