1+ import { execa } from "execa"
2+
13import { RooCodeEventName , type TaskEvent } from "@roo-code/types"
24
35import { findTask , updateTask , findRun } from "../db/index.js"
46
57import { getTag } from "./utils.js"
8+ import { FileLogger } from "./FileLogger.js"
69import { redisClient , getPubSubKey , registerRunner , deregisterRunner } from "./redis.js"
710import { runTask } from "./runTask.js"
811import { runUnitTest } from "./runUnitTest.js"
9- import { execa } from "execa"
1012
11- export const processTask = async ( taskId : number ) => {
13+ export const processTask = async ( { taskId, logger } : { taskId : number ; logger ?: FileLogger } ) => {
1214 const task = await findTask ( taskId )
15+ const { language, exercise } = task
1316 const run = await findRun ( task . runId )
1417 await registerRunner ( { runId : run . id , taskId } )
1518
16- try {
17- const tag = getTag ( "processTask" , { run, task } )
19+ logger =
20+ logger ||
21+ new FileLogger ( {
22+ logDir : `/var/log/evals/runs/${ run . id } ` ,
23+ filename : `${ language } -${ exercise } .log` ,
24+ tag : getTag ( "runTask" , { run, task } ) ,
25+ } )
1826
27+ try {
1928 const publish = async ( e : TaskEvent ) => {
2029 const redis = await redisClient ( )
2130 await redis . publish ( getPubSubKey ( run . id ) , JSON . stringify ( e ) )
2231 }
2332
24- console . log ( `[ ${ Date . now ( ) } | ${ tag } ] running task ${ task . id } (${ task . language } /${ task . exercise } )...`)
25- await runTask ( { run, task, publish } )
33+ logger . info ( ` running task ${ task . id } (${ language } /${ exercise } )...`)
34+ await runTask ( { run, task, publish, logger } )
2635
27- console . log ( `[ ${ Date . now ( ) } | ${ tag } ] testing task ${ task . id } (${ task . language } /${ task . exercise } )...`)
36+ logger . info ( ` testing task ${ task . id } (${ language } /${ exercise } )...`)
2837 const passed = await runUnitTest ( { run, task } )
2938
30- console . log ( `[ ${ Date . now ( ) } | ${ tag } ] task ${ task . id } (${ task . language } /${ task . exercise } ) -> ${ passed } `)
39+ logger . info ( ` task ${ task . id } (${ language } /${ exercise } ) -> ${ passed } `)
3140 await updateTask ( task . id , { passed } )
3241
3342 await publish ( {
@@ -39,18 +48,65 @@ export const processTask = async (taskId: number) => {
3948 }
4049}
4150
42- export const processTaskInContainer = async ( taskId : number ) => {
43- const args = [
44- `--name evals-task-${ taskId } ` ,
51+ export const processTaskInContainer = async ( {
52+ taskId,
53+ logger,
54+ maxRetries = 10 ,
55+ } : {
56+ taskId : number
57+ logger : FileLogger
58+ maxRetries ?: number
59+ } ) => {
60+ const baseArgs = [
4561 "--rm" ,
4662 "--network evals_default" ,
4763 "-v /var/run/docker.sock:/var/run/docker.sock" ,
64+ "-v /tmp/evals:/var/log/evals" ,
4865 "-e HOST_EXECUTION_METHOD=docker" ,
4966 ]
5067
5168 const command = `pnpm --filter @roo-code/evals cli --taskId ${ taskId } `
52- const subprocess = execa ( `docker run ${ args . join ( " " ) } evals-runner sh -c "${ command } "` , { shell : true } )
53- // subprocess.stdout?.on("data", (data) => console.log(data.toString()))
54- // subprocess.stderr?.on("data", (data) => console.error(data.toString()))
55- await subprocess
69+ logger . info ( command )
70+
71+ for ( let attempt = 0 ; attempt <= maxRetries ; attempt ++ ) {
72+ const containerName = `evals-task-${ taskId } .${ attempt } `
73+ const args = [ `--name ${ containerName } ` , ...baseArgs ]
74+ const isRetry = attempt > 0
75+
76+ if ( isRetry ) {
77+ const delayMs = Math . pow ( 2 , attempt - 1 ) * 1000 * ( 0.5 + Math . random ( ) )
78+ logger . info ( `retrying in ${ delayMs } ms (attempt ${ attempt + 1 } /${ maxRetries + 1 } )` )
79+ await new Promise ( ( resolve ) => setTimeout ( resolve , delayMs ) )
80+ }
81+
82+ logger . info (
83+ `${ isRetry ? "retrying" : "executing" } container command (attempt ${ attempt + 1 } /${ maxRetries + 1 } )` ,
84+ )
85+
86+ const subprocess = execa ( `docker run ${ args . join ( " " ) } evals-runner sh -c "${ command } "` , { shell : true } )
87+ // subprocess.stdout?.on("data", (data) => console.log(data.toString()))
88+ // subprocess.stderr?.on("data", (data) => console.error(data.toString()))
89+
90+ try {
91+ const result = await subprocess
92+ logger . info ( `container process completed with exit code: ${ result . exitCode } ` )
93+ return
94+ } catch ( error ) {
95+ if ( error && typeof error === "object" && "exitCode" in error ) {
96+ logger . error (
97+ `container process failed with exit code: ${ error . exitCode } (attempt ${ attempt + 1 } /${ maxRetries + 1 } )` ,
98+ )
99+ } else {
100+ logger . error ( `container process failed with error: ${ error } (attempt ${ attempt + 1 } /${ maxRetries + 1 } )` )
101+ }
102+
103+ if ( attempt === maxRetries ) {
104+ break
105+ }
106+ }
107+ }
108+
109+ logger . error ( `all ${ maxRetries + 1 } attempts failed, giving up` )
110+
111+ // TODO: Mark task as failed.
56112}
0 commit comments