@@ -643,10 +643,9 @@ Examples:
643643 },
644644}
645645
646-
647646// evalRunSingle executes a single eval run for one agent/model/reasoning combination.
648647// It handles output directory creation, task execution, aggregation, and output file writing.
649- func evalRunSingle (
648+ func evalRunSingle ( //nolint:gocognit,gocyclo,maintidx
650649 interruptCtx context.Context ,
651650 spec RunSpec ,
652651 shared SharedConfig ,
@@ -678,45 +677,11 @@ func evalRunSingle(
678677 // For resume mode: filter out completed tasks and clean incomplete dirs.
679678 totalTaskCount := len (allTasks )
680679 if isResuming && runCfg != nil {
681- // Build task map for ordering from run config.
682- taskMap := make (map [string ]* task.Task )
683- for _ , t := range allTasks {
684- taskMap [string (t .Language )+ "/" + t .Slug ] = t
685- }
686-
687- // Restore original task order from run config.
688- var orderedTasks []* task.Task
689- var missingTasks []string
690- for _ , slug := range runCfg .TaskList {
691- if t , ok := taskMap [slug ]; ok {
692- orderedTasks = append (orderedTasks , t )
693- } else {
694- missingTasks = append (missingTasks , slug )
695- }
696- }
697- if len (missingTasks ) > 0 {
698- logger .Warn ("some tasks from original run not found in current build" ,
699- "missing" , missingTasks ,
700- "count" , len (missingTasks ))
701- fmt .Printf (" Warning: %d task(s) from original run not found: %v\n " ,
702- len (missingTasks ), missingTasks )
703- }
704- allTasks = orderedTasks
705-
706- // Clean up incomplete task directories.
707- if err := cleanIncompleteTaskDirs (outputDir , completedTasks , allTasks ); err != nil {
708- return nil , nil , fmt .Errorf ("cleaning incomplete tasks: %w" , err )
709- }
710-
711- // Filter out completed tasks.
712- tasksToRun = nil
713- for _ , t := range allTasks {
714- taskSlug := string (t .Language ) + "/" + t .Slug
715- if ! completedTasks [taskSlug ] {
716- tasksToRun = append (tasksToRun , t )
717- }
680+ var err error
681+ allTasks , tasksToRun , err = prepareResumedTasks (allTasks , runCfg , outputDir , completedTasks )
682+ if err != nil {
683+ return nil , nil , err
718684 }
719-
720685 if len (tasksToRun ) == 0 {
721686 fmt .Println ("\n All tasks already completed. Nothing to resume." )
722687 return nil , nil , nil
@@ -774,7 +739,7 @@ func evalRunSingle(
774739 parallel = 1
775740 }
776741
777- if parallel == 1 {
742+ if parallel == 1 { //nolint:nestif // Sequential execution loop with deeply interleaved interrupt/quota/progress handling.
778743 consecutiveQuotaExhausted := 0
779744 for i , t := range tasksToRun {
780745 // Check for interrupt before starting next task.
@@ -2802,6 +2767,55 @@ func loadPreviousAttestation(outputDir string) (*EvalAttestation, error) {
28022767 return & attestation , nil
28032768}
28042769
2770+ // prepareResumedTasks restores task order from the run config, cleans incomplete
2771+ // directories, and filters out already-completed tasks for a resumed eval run.
2772+ func prepareResumedTasks (
2773+ allTasks []* task.Task ,
2774+ runCfg * RunConfig ,
2775+ outputDir string ,
2776+ completedTasks map [string ]bool ,
2777+ ) ([]* task.Task , []* task.Task , error ) {
2778+ // Build task map for ordering from run config.
2779+ taskMap := make (map [string ]* task.Task )
2780+ for _ , t := range allTasks {
2781+ taskMap [string (t .Language )+ "/" + t .Slug ] = t
2782+ }
2783+
2784+ // Restore original task order from run config.
2785+ var orderedTasks []* task.Task
2786+ var missingTasks []string
2787+ for _ , slug := range runCfg .TaskList {
2788+ if t , ok := taskMap [slug ]; ok {
2789+ orderedTasks = append (orderedTasks , t )
2790+ } else {
2791+ missingTasks = append (missingTasks , slug )
2792+ }
2793+ }
2794+ if len (missingTasks ) > 0 {
2795+ logger .Warn ("some tasks from original run not found in current build" ,
2796+ "missing" , missingTasks ,
2797+ "count" , len (missingTasks ))
2798+ fmt .Printf (" Warning: %d task(s) from original run not found: %v\n " ,
2799+ len (missingTasks ), missingTasks )
2800+ }
2801+
2802+ // Clean up incomplete task directories.
2803+ if err := cleanIncompleteTaskDirs (outputDir , completedTasks , orderedTasks ); err != nil {
2804+ return nil , nil , fmt .Errorf ("cleaning incomplete tasks: %w" , err )
2805+ }
2806+
2807+ // Filter out completed tasks.
2808+ var tasksToRun []* task.Task
2809+ for _ , t := range orderedTasks {
2810+ taskSlug := string (t .Language ) + "/" + t .Slug
2811+ if ! completedTasks [taskSlug ] {
2812+ tasksToRun = append (tasksToRun , t )
2813+ }
2814+ }
2815+
2816+ return orderedTasks , tasksToRun , nil
2817+ }
2818+
28052819// cleanIncompleteTaskDirs removes task directories that don't have validation.log.
28062820func cleanIncompleteTaskDirs (outputDir string , completed map [string ]bool , allTasks []* task.Task ) error {
28072821 for _ , t := range allTasks {
0 commit comments