Skip to content

Commit 39d6535

Browse files
authored
Fix rust evals (run cargo with bash) + logging tweaks (#4392)
1 parent 9381871 commit 39d6535

File tree

7 files changed

+124
-128
lines changed

7 files changed

+124
-128
lines changed

packages/evals/src/cli/index.ts

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ import { runEvals } from "./runEvals.js"
88
import { processTask } from "./processTask.js"
99

1010
const main = async () => {
11-
const result = await run(
11+
await run(
1212
command({
1313
name: "cli",
1414
description: "Execute an eval run.",
@@ -43,7 +43,6 @@ const main = async () => {
4343
process.argv.slice(2),
4444
)
4545

46-
console.log(result)
4746
process.exit(0)
4847
}
4948

packages/evals/src/cli/processTask.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ export const processTask = async (taskId: number) => {
2525
await runTask({ run, task, publish })
2626

2727
console.log(`[${Date.now()} | ${tag}] testing task ${task.id} (${task.language}/${task.exercise})...`)
28-
const passed = await runUnitTest({ task })
28+
const passed = await runUnitTest({ run, task })
2929

3030
console.log(`[${Date.now()} | ${tag}] task ${task.id} (${task.language}/${task.exercise}) -> ${passed}`)
3131
await updateTask(task.id, { passed })

packages/evals/src/cli/runEvals.ts

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
1-
import { execa } from "execa"
21
import PQueue from "p-queue"
32

43
import { findRun, finishRun, getTasks } from "../db/index.js"
54
import { exercisesPath } from "../exercises/index.js"
65

7-
import { getTag, isDockerContainer } from "./utils.js"
6+
import { getTag, isDockerContainer, resetEvalsRepo, commitEvalsRepoChanges } from "./utils.js"
87
import { processTask, processTaskInContainer } from "./processTask.js"
98
import { startHeartbeat, stopHeartbeat } from "./redis.js"
109

@@ -24,31 +23,32 @@ export const runEvals = async (runId: number) => {
2423
const tag = getTag("runEvals", { run })
2524
console.log(`[${Date.now()} | ${tag}] running ${tasks.length} task(s)`)
2625

27-
const cwd = exercisesPath
28-
await execa({ cwd })`git config user.name "Roo Code"`
29-
await execa({ cwd })`git config user.email "[email protected]"`
30-
await execa({ cwd })`git checkout -f`
31-
await execa({ cwd })`git clean -fd`
32-
await execa({ cwd })`git checkout -b runs/${run.id}-${crypto.randomUUID().slice(0, 8)} main`
26+
const containerized = isDockerContainer()
27+
28+
if (!containerized) {
29+
await resetEvalsRepo({ run, cwd: exercisesPath })
30+
}
3331

3432
const heartbeat = await startHeartbeat(run.id)
3533
const queue = new PQueue({ concurrency: run.concurrency })
3634

3735
try {
38-
const containerize = isDockerContainer()
39-
4036
await queue.addAll(
4137
tasks
4238
.filter((task) => task.finishedAt === null)
43-
.map((task) => () => (containerize ? processTaskInContainer(task.id) : processTask(task.id))),
39+
.map((task) => () => (containerized ? processTaskInContainer(task.id) : processTask(task.id))),
4440
)
4541

4642
console.log(`[${Date.now()} | ${tag}] finishRun`)
4743
const result = await finishRun(run.id)
4844
console.log(`[${Date.now()} | ${tag}] result ->`, result)
4945

50-
await execa({ cwd: exercisesPath })`git add .`
51-
await execa({ cwd: exercisesPath })`git commit -m ${`Run #${run.id}`} --no-verify`
46+
// There's no need to commit the changes in the container since they
47+
// will lost when the container is destroyed. I think we should
48+
// store the diffs in the database instead.
49+
if (!containerized) {
50+
await commitEvalsRepoChanges({ run, cwd: exercisesPath })
51+
}
5252
} finally {
5353
console.log(`[${Date.now()} | ${tag}] cleaning up`)
5454
stopHeartbeat(run.id, heartbeat)

packages/evals/src/cli/runTask.ts

Lines changed: 56 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,10 @@ import pWaitFor from "p-wait-for"
66
import { execa } from "execa"
77

88
import {
9+
type TaskEvent,
10+
TaskCommandName,
911
RooCodeEventName,
10-
IpcOrigin,
1112
IpcMessageType,
12-
TaskCommandName,
13-
type TaskEvent,
1413
EVALS_SETTINGS,
1514
EVALS_TIMEOUT,
1615
} from "@roo-code/types"
@@ -27,49 +26,38 @@ type RunTaskOptions = {
2726
publish: (taskEvent: TaskEvent) => Promise<void>
2827
}
2928

30-
export const runTask = async ({ run, task, publish }: RunTaskOptions): Promise<{ success: boolean }> => {
31-
const { language, exercise } = task
29+
export const runTask = async ({ run, task, publish }: RunTaskOptions) => {
3230
const tag = getTag("runTask", { run, task })
31+
const log = (message: string, ...args: unknown[]) => console.log(`[${Date.now()} | ${tag}] ${message}`, ...args)
32+
const logError = (message: string, ...args: unknown[]) =>
33+
console.error(`[${Date.now()} | ${tag}] ${message}`, ...args)
3334

35+
const { language, exercise } = task
3436
const prompt = fs.readFileSync(path.resolve(exercisesPath, `prompts/${language}.md`), "utf-8")
3537
const workspacePath = path.resolve(exercisesPath, language, exercise)
36-
const taskSocketPath = path.resolve(os.tmpdir(), `evals-${run.id}-${task.id}.sock`)
37-
38-
// Inject foot gun system prompt if present.
39-
if (process.env.FOOTGUN_SYSTEM_PROMPT) {
40-
const rooDir = path.join(workspacePath, ".roo")
41-
42-
if (!fs.existsSync(rooDir)) {
43-
fs.mkdirSync(rooDir, { recursive: true })
44-
}
45-
46-
fs.writeFileSync(path.join(rooDir, "system-prompt-code"), process.env.FOOTGUN_SYSTEM_PROMPT)
47-
}
48-
49-
console.log(`[${Date.now()} | ${tag}] Opening new VS Code window at ${workspacePath}`)
50-
38+
const ipcSocketPath = path.resolve(os.tmpdir(), `evals-${run.id}-${task.id}.sock`)
39+
const env = { ROO_CODE_IPC_SOCKET_PATH: ipcSocketPath }
5140
const controller = new AbortController()
5241
const cancelSignal = controller.signal
42+
const containerized = isDockerContainer()
5343

54-
const codeCommand = isDockerContainer()
55-
? `xvfb-run --auto-servernum --server-num=1 code --wait --log trace --disable-workspace-trust --disable-gpu --disable-lcd-text --no-sandbox --user-data-dir /roo/.vscode --password-store="basic"`
56-
: `code --disable-workspace-trust`
44+
const codeCommand = containerized
45+
? `xvfb-run --auto-servernum --server-num=1 code --wait --log trace --disable-workspace-trust --disable-gpu --disable-lcd-text --no-sandbox --user-data-dir /roo/.vscode --password-store="basic" -n ${workspacePath}`
46+
: `code --disable-workspace-trust -n ${workspacePath}`
5747

58-
console.log(`[${Date.now()} | ${tag}] ${codeCommand}`)
48+
log(codeCommand)
5949

60-
// Sleep for a random amount of time between 5 and 10 seconds.
61-
await new Promise((resolve) => setTimeout(resolve, Math.random() * 5_000 + 5_000))
50+
// Sleep for a random amount of time between 5 and 10 seconds, unless we're
51+
// running in a container, in which case there are no issues with flooding
52+
// VSCode with new windows.
53+
if (!containerized) {
54+
await new Promise((resolve) => setTimeout(resolve, Math.random() * 5_000 + 5_000))
55+
}
6256

63-
const subprocess = execa({
64-
env: {
65-
ROO_CODE_IPC_SOCKET_PATH: taskSocketPath,
66-
},
67-
shell: "/bin/bash",
68-
cancelSignal,
69-
})`${codeCommand} -n ${workspacePath}`
57+
const subprocess = execa({ env, shell: "/bin/bash", cancelSignal })`${codeCommand}`
7058

71-
// If debugging:
72-
subprocess.stdout.pipe(process.stdout)
59+
// If debugging, add `--verbose` to `command` and uncomment the following line.
60+
// subprocess.stdout.pipe(process.stdout)
7361

7462
// Give VSCode some time to spawn before connecting to its unix socket.
7563
await new Promise((resolve) => setTimeout(resolve, 3_000))
@@ -78,26 +66,20 @@ export const runTask = async ({ run, task, publish }: RunTaskOptions): Promise<{
7866

7967
while (true) {
8068
try {
81-
console.log(`[${Date.now()} | ${tag}] connecting to ${taskSocketPath}`)
82-
client = new IpcClient(taskSocketPath)
69+
client = new IpcClient(ipcSocketPath)
8370
await pWaitFor(() => client!.isReady, { interval: 250, timeout: 1_000 })
8471
break
8572
} catch (_error) {
86-
if (client) {
87-
client.disconnect()
88-
}
89-
73+
client?.disconnect()
9074
attempts--
9175

9276
if (attempts <= 0) {
93-
console.error(`[${Date.now()} | ${tag}] unable to connect`)
94-
return { success: false }
77+
logError(`unable to connect to IPC socket -> ${ipcSocketPath}`)
78+
return
9579
}
9680
}
9781
}
9882

99-
console.log(`[${Date.now()} | ${tag}] connected to ${taskSocketPath}`)
100-
10183
let taskStartedAt = Date.now()
10284
let taskFinishedAt: number | undefined
10385
let taskMetricsId: number | undefined
@@ -106,18 +88,24 @@ export const runTask = async ({ run, task, publish }: RunTaskOptions): Promise<{
10688

10789
const ignoreEvents: Record<"broadcast" | "log", RooCodeEventName[]> = {
10890
broadcast: [RooCodeEventName.Message],
109-
log: [RooCodeEventName.TaskTokenUsageUpdated], // [RooCodeEventName.Message, RooCodeEventName.TaskAskResponded],
91+
log: [RooCodeEventName.TaskTokenUsageUpdated, RooCodeEventName.TaskAskResponded],
11092
}
11193

11294
client.on(IpcMessageType.TaskEvent, async (taskEvent) => {
11395
const { eventName, payload } = taskEvent
11496

97+
// Publish all events except for these to Redis.
11598
if (!ignoreEvents.broadcast.includes(eventName)) {
11699
await publish({ ...taskEvent, taskId: task.id })
117100
}
118101

119-
if (!ignoreEvents.log.includes(eventName)) {
120-
console.log(`[${Date.now()} | ${tag}] ${eventName} ->`, payload)
102+
// Log all events except for these.
103+
// For message events we only log non-partial messages.
104+
if (
105+
!ignoreEvents.log.includes(eventName) &&
106+
(eventName !== RooCodeEventName.Message || payload[0].message.partial !== true)
107+
) {
108+
log(`${eventName} ->`, payload)
121109
}
122110

123111
if (eventName === RooCodeEventName.TaskStarted) {
@@ -177,77 +165,47 @@ export const runTask = async ({ run, task, publish }: RunTaskOptions): Promise<{
177165
})
178166

179167
client.on(IpcMessageType.Disconnect, async () => {
180-
console.log(`[${Date.now()} | ${tag}] disconnect`)
168+
log(`disconnected from IPC socket -> ${ipcSocketPath}`)
181169
isClientDisconnected = true
182170
})
183171

184-
if (client.isReady) {
185-
const configuration = {
186-
...EVALS_SETTINGS,
187-
...run.settings,
188-
openRouterApiKey: process.env.OPENROUTER_API_KEY,
189-
}
190-
191-
client.sendMessage({
192-
type: IpcMessageType.TaskCommand,
193-
origin: IpcOrigin.Client,
194-
clientId: client.clientId!,
195-
data: {
196-
commandName: TaskCommandName.StartNewTask,
197-
data: {
198-
configuration,
199-
text: prompt,
200-
newTab: true,
201-
},
172+
client.sendCommand({
173+
commandName: TaskCommandName.StartNewTask,
174+
data: {
175+
configuration: {
176+
...EVALS_SETTINGS,
177+
...run.settings,
178+
openRouterApiKey: process.env.OPENROUTER_API_KEY,
202179
},
203-
})
204-
} else {
205-
console.error(`[${Date.now()} | ${tag}] unable to connect`)
206-
client.disconnect()
207-
taskFinishedAt = Date.now()
208-
isClientDisconnected = true
209-
}
180+
text: prompt,
181+
newTab: true,
182+
},
183+
})
210184

211185
try {
212186
await pWaitFor(() => !!taskFinishedAt || isClientDisconnected, { interval: 1_000, timeout: EVALS_TIMEOUT })
213-
// eslint-disable-next-line @typescript-eslint/no-unused-vars
214-
} catch (error) {
215-
console.log(`[${Date.now()} | ${tag}] time limit reached`)
187+
} catch (_error) {
188+
logError("time limit reached")
216189

217-
// Cancel the task.
218190
if (rooTaskId && !isClientDisconnected) {
219-
client.sendMessage({
220-
type: IpcMessageType.TaskCommand,
221-
origin: IpcOrigin.Client,
222-
clientId: client.clientId!,
223-
data: { commandName: TaskCommandName.CancelTask, data: rooTaskId },
224-
})
225-
226-
// Allow some time for the task to cancel.
227-
await new Promise((resolve) => setTimeout(resolve, 5_000))
191+
log("cancelling task")
192+
client.sendCommand({ commandName: TaskCommandName.CancelTask, data: rooTaskId })
193+
await new Promise((resolve) => setTimeout(resolve, 5_000)) // Allow some time for the task to cancel.
228194
}
229195

230196
await updateTask(task.id, { finishedAt: new Date() })
231197
}
232198

233199
if (!isClientDisconnected) {
234200
if (rooTaskId) {
235-
client.sendMessage({
236-
type: IpcMessageType.TaskCommand,
237-
origin: IpcOrigin.Client,
238-
clientId: client.clientId!,
239-
data: { commandName: TaskCommandName.CloseTask, data: rooTaskId },
240-
})
241-
242-
// Allow some time for the window to close.
243-
await new Promise((resolve) => setTimeout(resolve, 2_000))
201+
log("closing task")
202+
client.sendCommand({ commandName: TaskCommandName.CloseTask, data: rooTaskId })
203+
await new Promise((resolve) => setTimeout(resolve, 2_000)) // Allow some time for the window to close.
244204
}
245205

246206
client.disconnect()
247207
}
248208

249209
controller.abort()
250210
await subprocess
251-
252-
return { success: !!taskFinishedAt }
253211
}

0 commit comments

Comments
 (0)