@@ -21,11 +21,12 @@ import {
2121 findRun ,
2222 createRun ,
2323 finishRun ,
24+ type Task ,
2425 createTask ,
25- Task ,
2626 getTasks ,
2727 updateTask ,
2828 createTaskMetrics ,
29+ updateTaskMetrics ,
2930} from "@benchmark/db"
3031import { inChunksOf } from "@benchmark/lib"
3132import { IpcServer , IpcClient } from "@benchmark/ipc"
@@ -62,7 +63,7 @@ const run = async (toolbox: GluegunToolbox) => {
6263 run = await findRun ( id )
6364 } else {
6465 run = await createRun ( {
65- model : "anthropic/claude-3.7-sonnet" ,
66+ model : rooCodeDefaults . openRouterModelId ! ,
6667 pid : process . pid ,
6768 socketPath : path . resolve ( os . tmpdir ( ) , `benchmark-${ crypto . randomUUID ( ) } .sock` ) ,
6869 } )
@@ -103,7 +104,7 @@ const run = async (toolbox: GluegunToolbox) => {
103104 } )
104105 } )
105106
106- const chunks = inChunksOf ( tasks , 2 )
107+ const chunks = inChunksOf ( tasks , 3 )
107108
108109 for ( const chunk of chunks ) {
109110 await Promise . all (
@@ -144,7 +145,7 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server
144145
145146 while ( ++ tries < 5 ) {
146147 try {
147- await pWaitFor ( ( ) => client . isConnected , { interval : 100 , timeout : 2_000 } )
148+ await pWaitFor ( ( ) => client . isReady , { interval : 100 , timeout : 2_000 } )
148149 break
149150 } catch ( error ) {
150151 console . error ( error )
@@ -153,6 +154,12 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server
153154 }
154155 }
155156
157+ if ( ! client . isReady ) {
158+ client . disconnect ( )
159+ console . log ( `[cli#runExercise | ${ language } / ${ exercise } ] unable to connect` )
160+ return false
161+ }
162+
156163 let isTaskFinished = false
157164 let isTaskAborted = false
158165
@@ -168,6 +175,7 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server
168175 ]
169176
170177 let taskStartedAt = Date . now ( )
178+ let taskMetricsId : number | undefined
171179
172180 client . on ( IpcMessageType . TaskEvent , async ( taskEvent ) => {
173181 const { eventName, payload } = taskEvent
@@ -193,16 +201,33 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server
193201
194202 if ( eventName === RooCodeEventName . TaskStarted ) {
195203 taskStartedAt = Date . now ( )
196- await updateTask ( task . id , { startedAt : new Date ( ) } )
204+
205+ const taskMetrics = await createTaskMetrics ( {
206+ cost : 0 ,
207+ tokensIn : 0 ,
208+ tokensOut : 0 ,
209+ tokensContext : 0 ,
210+ duration : 0 ,
211+ cacheWrites : 0 ,
212+ cacheReads : 0 ,
213+ } )
214+
215+ await updateTask ( task . id , { taskMetricsId : taskMetrics . id , startedAt : new Date ( ) } )
216+
217+ taskStartedAt = Date . now ( )
218+ taskMetricsId = taskMetrics . id
197219 }
198220
199- if ( eventName === RooCodeEventName . TaskCompleted ) {
221+ if (
222+ ( eventName === RooCodeEventName . TaskTokenUsageUpdated || eventName === RooCodeEventName . TaskCompleted ) &&
223+ taskMetricsId
224+ ) {
200225 const duration = Date . now ( ) - taskStartedAt
201226
202227 const { totalCost, totalTokensIn, totalTokensOut, contextTokens, totalCacheWrites, totalCacheReads } =
203228 payload [ 1 ]
204229
205- const taskMetrics = await createTaskMetrics ( {
230+ await updateTaskMetrics ( taskMetricsId , {
206231 cost : totalCost ,
207232 tokensIn : totalTokensIn ,
208233 tokensOut : totalTokensOut ,
@@ -211,8 +236,10 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server
211236 cacheWrites : totalCacheWrites ?? 0 ,
212237 cacheReads : totalCacheReads ?? 0 ,
213238 } )
239+ }
214240
215- await updateTask ( task . id , { taskMetricsId : taskMetrics . id , finishedAt : new Date ( ) } )
241+ if ( eventName === RooCodeEventName . TaskCompleted ) {
242+ await updateTask ( task . id , { finishedAt : new Date ( ) } )
216243 isTaskFinished = true
217244 }
218245
@@ -239,12 +266,13 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server
239266 } ,
240267 } )
241268
242- console . log ( `[cli#runExercise | ${ language } / ${ exercise } ] StartNewTask ` )
269+ console . log ( `[cli#runExercise | ${ language } / ${ exercise } ] starting task ` )
243270
244271 try {
245- await pWaitFor ( ( ) => isTaskFinished || isTaskAborted , { interval : 1_000 , timeout : 300 * 1_000 } )
272+ await pWaitFor ( ( ) => isTaskFinished || isTaskAborted , { interval : 1_000 , timeout : 1 * 60 * 1_000 } )
273+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
246274 } catch ( error ) {
247- console . error ( error )
275+ console . log ( `[cli#runExercise | ${ language } / ${ exercise } ] time limit reached` )
248276 }
249277
250278 try {
@@ -255,8 +283,6 @@ const runExercise = async ({ run, task, server }: { run: Run; task: Task; server
255283 data : "workbench.action.closeWindow" ,
256284 } )
257285
258- console . log ( `[cli#runExercise | ${ language } / ${ exercise } ] VSCodeCommand (workbench.action.closeWindow)` )
259-
260286 client . disconnect ( )
261287 } catch ( error ) {
262288 console . error ( error )
0 commit comments