Skip to content

Commit cd78de7

Browse files
committed
More progress
1 parent 9cf659f commit cd78de7

File tree

3 files changed

+52
-14
lines changed

3 files changed

+52
-14
lines changed

benchmark/apps/cli/src/index.ts

Lines changed: 39 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,12 @@ import {
2121
findRun,
2222
createRun,
2323
finishRun,
24+
type Task,
2425
createTask,
25-
Task,
2626
getTasks,
2727
updateTask,
2828
createTaskMetrics,
29+
updateTaskMetrics,
2930
} from "@benchmark/db"
3031
import { inChunksOf } from "@benchmark/lib"
3132
import { IpcServer, IpcClient } from "@benchmark/ipc"
@@ -62,7 +63,7 @@ const run = async (toolbox: GluegunToolbox) => {
6263
run = await findRun(id)
6364
} else {
6465
run = await createRun({
65-
model: "anthropic/claude-3.7-sonnet",
66+
model: rooCodeDefaults.openRouterModelId!,
6667
pid: process.pid,
6768
socketPath: path.resolve(os.tmpdir(), `benchmark-${crypto.randomUUID()}.sock`),
6869
})
@@ -103,7 +104,7 @@ const run = async (toolbox: GluegunToolbox) => {
103104
})
104105
})
105106

106-
const chunks = inChunksOf(tasks, 2)
107+
const chunks = inChunksOf(tasks, 3)
107108

108109
for (const chunk of chunks) {
109110
await Promise.all(
@@ -144,7 +145,7 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server
144145

145146
while (++tries < 5) {
146147
try {
147-
await pWaitFor(() => client.isConnected, { interval: 100, timeout: 2_000 })
148+
await pWaitFor(() => client.isReady, { interval: 100, timeout: 2_000 })
148149
break
149150
} catch (error) {
150151
console.error(error)
@@ -153,6 +154,12 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server
153154
}
154155
}
155156

157+
if (!client.isReady) {
158+
client.disconnect()
159+
console.log(`[cli#runExercise | ${language} / ${exercise}] unable to connect`)
160+
return false
161+
}
162+
156163
let isTaskFinished = false
157164
let isTaskAborted = false
158165

@@ -168,6 +175,7 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server
168175
]
169176

170177
let taskStartedAt = Date.now()
178+
let taskMetricsId: number | undefined
171179

172180
client.on(IpcMessageType.TaskEvent, async (taskEvent) => {
173181
const { eventName, payload } = taskEvent
@@ -193,16 +201,33 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server
193201

194202
if (eventName === RooCodeEventName.TaskStarted) {
195203
taskStartedAt = Date.now()
196-
await updateTask(task.id, { startedAt: new Date() })
204+
205+
const taskMetrics = await createTaskMetrics({
206+
cost: 0,
207+
tokensIn: 0,
208+
tokensOut: 0,
209+
tokensContext: 0,
210+
duration: 0,
211+
cacheWrites: 0,
212+
cacheReads: 0,
213+
})
214+
215+
await updateTask(task.id, { taskMetricsId: taskMetrics.id, startedAt: new Date() })
216+
217+
taskStartedAt = Date.now()
218+
taskMetricsId = taskMetrics.id
197219
}
198220

199-
if (eventName === RooCodeEventName.TaskCompleted) {
221+
if (
222+
(eventName === RooCodeEventName.TaskTokenUsageUpdated || eventName === RooCodeEventName.TaskCompleted) &&
223+
taskMetricsId
224+
) {
200225
const duration = Date.now() - taskStartedAt
201226

202227
const { totalCost, totalTokensIn, totalTokensOut, contextTokens, totalCacheWrites, totalCacheReads } =
203228
payload[1]
204229

205-
const taskMetrics = await createTaskMetrics({
230+
await updateTaskMetrics(taskMetricsId, {
206231
cost: totalCost,
207232
tokensIn: totalTokensIn,
208233
tokensOut: totalTokensOut,
@@ -211,8 +236,10 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server
211236
cacheWrites: totalCacheWrites ?? 0,
212237
cacheReads: totalCacheReads ?? 0,
213238
})
239+
}
214240

215-
await updateTask(task.id, { taskMetricsId: taskMetrics.id, finishedAt: new Date() })
241+
if (eventName === RooCodeEventName.TaskCompleted) {
242+
await updateTask(task.id, { finishedAt: new Date() })
216243
isTaskFinished = true
217244
}
218245

@@ -239,12 +266,13 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server
239266
},
240267
})
241268

242-
console.log(`[cli#runExercise | ${language} / ${exercise}] StartNewTask`)
269+
console.log(`[cli#runExercise | ${language} / ${exercise}] starting task`)
243270

244271
try {
245-
await pWaitFor(() => isTaskFinished || isTaskAborted, { interval: 1_000, timeout: 300 * 1_000 })
272+
await pWaitFor(() => isTaskFinished || isTaskAborted, { interval: 1_000, timeout: 1 * 60 * 1_000 })
273+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
246274
} catch (error) {
247-
console.error(error)
275+
console.log(`[cli#runExercise | ${language} / ${exercise}] time limit reached`)
248276
}
249277

250278
try {
@@ -255,8 +283,6 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server
255283
data: "workbench.action.closeWindow",
256284
})
257285

258-
console.log(`[cli#runExercise | ${language} / ${exercise}] VSCodeCommand (workbench.action.closeWindow)`)
259-
260286
client.disconnect()
261287
} catch (error) {
262288
console.error(error)

benchmark/packages/db/src/queries/taskMetrics.ts

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import { eq } from "drizzle-orm"
22

33
import { RecordNotFoundError, RecordNotCreatedError } from "./errors.js"
4-
import type { InsertTaskMetrics } from "../schema.js"
4+
import type { InsertTaskMetrics, UpdateTaskMetrics } from "../schema.js"
55
import { insertTaskMetricsSchema, taskMetrics } from "../schema.js"
66
import { db } from "../db.js"
77

@@ -34,3 +34,14 @@ export const createTaskMetrics = async (args: InsertTaskMetrics) => {
3434

3535
return record
3636
}
37+
38+
export const updateTaskMetrics = async (id: number, values: UpdateTaskMetrics) => {
39+
const records = await db.update(table).set(values).where(eq(table.id, id)).returning()
40+
const record = records[0]
41+
42+
if (!record) {
43+
throw new RecordNotFoundError()
44+
}
45+
46+
return record
47+
}

benchmark/packages/db/src/queries/tasks.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ export const updateTask = async (id: number, values: UpdateTask) => {
4747

4848
return record
4949
}
50+
5051
type GetTask = {
5152
runId: number
5253
language: ExerciseLanguage

0 commit comments

Comments
 (0)