Skip to content

Commit d26f737

Browse files
committed
More progress
1 parent 9fd086f commit d26f737

File tree

9 files changed

+1392
-103
lines changed

9 files changed

+1392
-103
lines changed

benchmark/package-lock.json

Lines changed: 1214 additions & 51 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

benchmark/package.json

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,10 @@
77
"lint": "eslint src --ext ts",
88
"check-types": "tsc --noEmit",
99
"build": "rm -rf out && tsc -p tsconfig.json",
10-
"benchmark": "xvbf-run node out/run.js",
10+
"benchmark": "npx dotenvx run -f .env.local -- node out/run.js",
11+
"cli": "tsx src/cli.ts",
1112
"clean": "rimraf out",
13+
"clean:exercises": "cd exercises && git checkout -f && git clean -fd",
1214
"docker:base:build": "docker build -f Dockerfile.base -t roo-code-benchmark-base ..",
1315
"docker:build": "npm run docker:base:build && docker build -f Dockerfile -t roo-code-benchmark ..",
1416
"docker:run": "docker run -d -it roo-code-benchmark",
@@ -22,13 +24,14 @@
2224
"docker:dev:run": "docker run -d -p 2222:22 -v ~/Documents/Roo-Code:/home/vscode/repo -it roo-code-benchmark-dev",
2325
"docker:dev:start": "npm run docker:dev:build && npm run docker:dev:run",
2426
"docker:dev:shell": "docker exec -it $(docker ps --filter \"ancestor=roo-code-benchmark-dev\" -q) /bin/bash",
25-
"docker:dev:benchmark": "docker exec -it $(docker ps --filter \"ancestor=roo-code-benchmark-dev\" -q) npm run benchmark --",
27+
"docker:dev:benchmark": "docker exec -it $(docker ps --filter \"ancestor=roo-code-benchmark-dev\" -q) xvbf-run npm run benchmark --",
2628
"docker:dev:stop": "docker stop $(docker ps --filter \"ancestor=roo-code-benchmark-dev\" -q)",
2729
"docker:dev:rm": "docker rm $(docker ps -a --filter \"ancestor=roo-code-benchmark-dev\" -q)",
2830
"docker:dev:clean": "npm run docker:dev:stop && npm run docker:dev:rm"
2931
},
3032
"devDependencies": {
3133
"@vscode/test-electron": "^2.4.0",
34+
"gluegun": "^5.1.2",
3235
"tsx": "^4.19.3",
3336
"typescript": "^5.4.5",
3437
"yargs": "^17.7.2"

benchmark/prompts/javascript.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
Your job is to complete a coding exercise described by `.docs/instructions.md`.
2+
3+
A file with the implementation stubbed out has been created for you, along with a test file.
4+
5+
To successfully complete the exercise, you must pass all the tests in the test file.
6+
7+
To confirm that your solution is correct, run the tests with `npm test`.
8+
9+
Before running the tests make sure your environment is set up by running `npm install` to install the dependencies.

benchmark/prompts/typescript.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
Your job is to complete a coding exercise described by `.docs/instructions.md`.
2+
3+
A file with the implementation stubbed out has been created for you, along with a test file.
4+
5+
To successfully complete the exercise, you must pass all the tests in the test file.
6+
7+
To confirm that your solution is correct, run the tests with `yarn test`. Do not meaningfully alter the tests. You should enable the tests if they are disabled (e.g. change `xtest` to `test` for jest).
8+
9+
Before running the tests make sure your environment is set up by running `corepack enable` to enable yarn and `yarn install` to install the dependencies.

benchmark/src/cli.ts

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
import * as fs from "fs"
2+
import * as path from "path"
3+
import { execSync } from "child_process"
4+
5+
import { build } from "gluegun"
6+
7+
async function main() {
8+
const cli = build()
9+
.brand("roo-code-benchmark-runner")
10+
.src(__dirname)
11+
.help()
12+
.version()
13+
.defaultCommand() // Use the default command if no args.
14+
.create()
15+
16+
const { print, prompt } = await cli.run(process.argv)
17+
18+
try {
19+
const exercisesPath = path.resolve(__dirname, "../../benchmark/exercises")
20+
21+
if (!fs.existsSync(exercisesPath)) {
22+
print.error(`Error: Exercises directory not found at ${exercisesPath}`)
23+
process.exit(1)
24+
}
25+
26+
const languages = fs
27+
.readdirSync(exercisesPath, { withFileTypes: true })
28+
.filter((entry) => entry.isDirectory() && !entry.name.startsWith("."))
29+
.map((dir) => dir.name)
30+
31+
if (languages.length === 0) {
32+
print.error("No language directories found in the exercises folder")
33+
process.exit(1)
34+
}
35+
36+
const { selectedLanguage } = await prompt.ask({
37+
type: "select",
38+
name: "selectedLanguage",
39+
message: "Select a language:",
40+
choices: languages,
41+
})
42+
43+
const languagePath = path.join(exercisesPath, selectedLanguage)
44+
45+
if (!fs.existsSync(languagePath)) {
46+
print.error(`Error: Language directory not found at ${languagePath}`)
47+
process.exit(1)
48+
}
49+
50+
const exercisesForLanguage = fs
51+
.readdirSync(languagePath, { withFileTypes: true })
52+
.filter((entry) => entry.isDirectory() && !entry.name.startsWith("."))
53+
.map((dir) => dir.name)
54+
55+
if (exercisesForLanguage.length === 0) {
56+
print.error(`No exercises found for ${selectedLanguage}`)
57+
process.exit(1)
58+
}
59+
60+
const { selectedExercise } = await prompt.ask({
61+
type: "select",
62+
name: "selectedExercise",
63+
message: "Select an exercise:",
64+
choices: exercisesForLanguage,
65+
})
66+
67+
const exercisePath = `exercises/${selectedLanguage}/${selectedExercise}`
68+
print.info(`Running ${exercisePath}...`)
69+
70+
try {
71+
const benchmarkDir = path.resolve(__dirname, "../../benchmark")
72+
73+
const spinner = print.spin("Building...")
74+
execSync("npm run build", { stdio: "inherit", cwd: benchmarkDir })
75+
spinner.succeed("Build completed")
76+
77+
print.info(`Running exercise: ${exercisePath}`)
78+
const runCommand = `npm run benchmark -- -e ${exercisePath}`
79+
execSync(runCommand, { stdio: "inherit", cwd: benchmarkDir })
80+
process.exit(0)
81+
} catch (error) {
82+
print.error(error)
83+
process.exit(1)
84+
}
85+
} catch (error) {
86+
print.error(error)
87+
process.exit(1)
88+
}
89+
}
90+
91+
main()

benchmark/src/run.ts

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
import * as path from "path"
2+
import * as fs from "fs/promises"
23

34
import yargs from "yargs"
45
import { hideBin } from "yargs/helpers"
56
import { runTests } from "@vscode/test-electron"
67

78
async function main() {
9+
// npm run build && npx dotenvx run -f .env.local -- node ./out/run.js -e exercises/javascript/binary
810
const argv = await yargs(hideBin(process.argv))
911
.option("exercise", {
1012
alias: "e",
@@ -16,25 +18,15 @@ async function main() {
1618

1719
const extensionDevelopmentPath = path.resolve(__dirname, "../../")
1820
const extensionTestsPath = path.resolve(__dirname, "./runExercise")
19-
const testWorkspace = path.resolve(__dirname, argv.exercise)
21+
const testWorkspace = path.resolve(__dirname, "..", argv.exercise)
22+
const language = path.basename(path.dirname(testWorkspace))
23+
const prompt = await fs.readFile(path.resolve(__dirname, "..", `prompts/${language}.md`), "utf-8")
2024

2125
await runTests({
2226
extensionDevelopmentPath: extensionDevelopmentPath,
2327
extensionTestsPath: extensionTestsPath,
2428
launchArgs: [testWorkspace, "--disable-extensions"],
25-
extensionTestsEnv: {
26-
prompt: `
27-
Your job is to complete a coding exercise described by \`.docs/instructions.md\`.
28-
A file with the implementation stubbed out has been created for you, along with a test file.
29-
To successfully complete the exercise, you must pass all the tests in the test file.
30-
To confirm that your solution is correct, run the tests with \`yarn test\`.
31-
Before running the test make sure your environment is set up by running \`corepack enable\` to enable yarn and \`yarn install\` to install the dependencies.
32-
`
33-
.trim()
34-
.split("\n")
35-
.map((line) => line.trim())
36-
.join("\n"),
37-
},
29+
extensionTestsEnv: { prompt },
3830
})
3931
}
4032

benchmark/src/runExercise.ts

Lines changed: 34 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@ import { RooCodeAPI } from "../../src/exports/roo-code"
44

55
import { waitFor } from "./utils"
66

7+
const TIME_LIMIT = 300_000
8+
const STALL_LIMIT = 60_000
9+
710
export async function run() {
811
const extension = vscode.extensions.getExtension<RooCodeAPI>("RooVeterinaryInc.roo-cline")
912

@@ -12,49 +15,52 @@ export async function run() {
1215
}
1316

1417
const api = extension.isActive ? extension.exports : await extension.activate()
15-
16-
await api.sidebarProvider.updateGlobalState("apiProvider", "openrouter")
17-
await api.sidebarProvider.updateGlobalState("openRouterModelId", "anthropic/claude-3.7-sonnet")
18-
await api.sidebarProvider.updateGlobalState("autoApprovalEnabled", true)
19-
await api.sidebarProvider.updateGlobalState("alwaysAllowReadOnly", true)
20-
await api.sidebarProvider.updateGlobalState("alwaysAllowWrite", true)
21-
await api.sidebarProvider.updateGlobalState("alwaysAllowExecute", true)
22-
await api.sidebarProvider.updateGlobalState("alwaysAllowBrowser", true)
23-
await api.sidebarProvider.updateGlobalState("alwaysApproveResubmit", true)
24-
await api.sidebarProvider.updateGlobalState("alwaysAllowMcp", true)
25-
await api.sidebarProvider.updateGlobalState("alwaysAllowModeSwitch", true)
26-
27-
await api.sidebarProvider.storeSecret("openRouterApiKey", process.env.OPENROUTER_API_KEY!)
18+
const provider = api.sidebarProvider
19+
await provider.updateGlobalState("apiProvider", "openrouter")
20+
await provider.updateGlobalState("openRouterModelId", "anthropic/claude-3.7-sonnet")
21+
await provider.updateGlobalState("autoApprovalEnabled", true)
22+
await provider.updateGlobalState("alwaysAllowReadOnly", true)
23+
await provider.updateGlobalState("alwaysAllowWrite", true)
24+
await provider.updateGlobalState("alwaysAllowExecute", true)
25+
await provider.updateGlobalState("alwaysAllowBrowser", true)
26+
await provider.updateGlobalState("alwaysApproveResubmit", true)
27+
await provider.updateGlobalState("alwaysAllowMcp", true)
28+
await provider.updateGlobalState("alwaysAllowModeSwitch", true)
29+
await provider.storeSecret("openRouterApiKey", process.env.OPENROUTER_API_KEY!)
2830

2931
await vscode.workspace
3032
.getConfiguration("roo-cline")
3133
.update("allowedCommands", ["*"], vscode.ConfigurationTarget.Global)
3234

33-
await api.sidebarProvider.resolveWebviewView(
34-
vscode.window.createWebviewPanel("roo-cline.SidebarProvider", "Roo Code", vscode.ViewColumn.One, {
35-
enableScripts: true,
36-
enableCommandUris: true,
37-
retainContextWhenHidden: true,
38-
localResourceRoots: [extension.extensionUri],
39-
}),
40-
)
41-
42-
await waitFor(() => api.sidebarProvider.viewLaunched)
35+
await vscode.commands.executeCommand("roo-cline.SidebarProvider.focus")
36+
await waitFor(() => provider.viewLaunched)
4337

4438
await api.startNewTask(process.env.prompt!)
4539

4640
let cursor = 0
4741

42+
const startTime = Date.now()
43+
4844
const getMessage = async () => {
49-
await waitFor(() => api.sidebarProvider.messages.length > cursor, { timeout: 120_000 }).catch(() => {})
50-
return api.sidebarProvider.messages[cursor++]
45+
await waitFor(() => provider.messages.length > cursor, { timeout: STALL_LIMIT })
46+
return provider.messages[cursor++]
5147
}
5248

5349
while (true) {
54-
const message = await getMessage()
55-
console.log("message = ", message)
50+
try {
51+
const message = await getMessage()
52+
console.log("message = ", message)
53+
54+
if (!message || message.say === "completion_result") {
55+
break
56+
}
57+
} catch (e) {
58+
console.error(e)
59+
break
60+
}
5661

57-
if (!message || message.say === "completion_result") {
62+
if (Date.now() - startTime > TIME_LIMIT) {
63+
console.log("Time's up!")
5864
break
5965
}
6066
}

benchmark/src/utils.ts

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,30 @@ type WaitForOptions = {
33
interval?: number
44
}
55

6-
export const waitFor = (condition: () => boolean, { timeout = 10_000, interval = 250 }: WaitForOptions = {}) =>
7-
Promise.race([
6+
export const waitFor = (condition: () => boolean, { timeout = 10_000, interval = 250 }: WaitForOptions = {}) => {
7+
let timeoutId: NodeJS.Timeout | undefined = undefined
8+
9+
return Promise.race([
810
new Promise<void>((resolve) => {
9-
const check = () => (condition() ? resolve() : setTimeout(check, interval))
11+
const check = () => {
12+
if (condition()) {
13+
if (timeoutId) {
14+
clearTimeout(timeoutId)
15+
timeoutId = undefined
16+
}
17+
18+
resolve()
19+
} else {
20+
setTimeout(check, interval)
21+
}
22+
}
23+
1024
check()
1125
}),
12-
new Promise((_, reject) =>
13-
setTimeout(() => {
14-
console.log(`Timeout after ${Math.floor(timeout / 1000)}s`)
26+
new Promise((_, reject) => {
27+
timeoutId = setTimeout(() => {
1528
reject(new Error(`Timeout after ${Math.floor(timeout / 1000)}s`))
16-
}, timeout),
17-
),
29+
}, timeout)
30+
}),
1831
])
32+
}

src/exports/roo-code.d.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import * as vscode from "vscode"
2+
13
export interface RooCodeAPI {
24
/**
35
* Sets the custom instructions in the global storage.

0 commit comments

Comments
 (0)