@@ -132,12 +132,27 @@ def _resume_from_state(self):
132132 self .pending_jobs = []
133133 for job_data in state .pending_jobs :
134134 job = Job .from_dict (job_data )
135- self .pending_jobs .append (job )
135+ # Check if file exists (if job has a file)
136+ if job .file and not job .file .exists ():
137+ logger .error (f"File not found for job { job .id } : { job .file } " )
138+ logger .error ("This may happen if files were in temporary directories that were cleaned up" )
139+ self .failed_jobs [job .id ] = f"File not found: { job .file } "
140+ else :
141+ self .pending_jobs .append (job )
136142
137- # Restore completed results
138- for result_data in state .completed_results :
139- result = JobResult .from_dict (result_data )
140- self .completed_results [result .job_id ] = result
143+ # Restore completed results from file references
144+ for result_ref in state .completed_results :
145+ job_id = result_ref ["job_id" ]
146+ file_path = result_ref ["file_path" ]
147+ try :
148+ with open (file_path , 'r' ) as f :
149+ result_data = json .load (f )
150+ result = JobResult .from_dict (result_data )
151+ self .completed_results [job_id ] = result
152+ except Exception as e :
153+ logger .error (f"Failed to load result for { job_id } from { file_path } : { e } " )
154+ # Move to failed jobs if we can't load the result
155+ self .failed_jobs [job_id ] = f"Failed to load result file: { e } "
141156
142157 # Restore failed jobs
143158 for job_data in state .failed_jobs :
@@ -162,7 +177,10 @@ def to_json(self) -> Dict:
162177 return {
163178 "created_at" : datetime .now ().isoformat (),
164179 "pending_jobs" : [job .to_dict () for job in self .pending_jobs ],
165- "completed_results" : [result .to_dict () for result in self .completed_results .values ()],
180+ "completed_results" : [
181+ {"job_id" : job_id , "file_path" : str (self .results_dir / f"{ job_id } .json" )}
182+ for job_id in self .completed_results .keys ()
183+ ],
166184 "failed_jobs" : [
167185 {
168186 "id" : job_id ,
@@ -505,10 +523,15 @@ def _update_batch_results(self, batch_result: Dict):
505523 self .failed_jobs [result .job_id ] = error_message
506524 self ._save_result_to_file (result )
507525 logger .error (f"✗ Job { result .job_id } failed: { result .error } " )
526+
527+ # Remove completed/failed job from pending
528+ self .pending_jobs = [job for job in self .pending_jobs if job .id != result .job_id ]
508529
509530 # Update failed jobs
510531 for job_id , error in failed .items ():
511532 self .failed_jobs [job_id ] = error
533+ # Remove failed job from pending
534+ self .pending_jobs = [job for job in self .pending_jobs if job .id != job_id ]
512535 logger .error (f"✗ Job { job_id } failed: { error } " )
513536
514537 # Update batch tracking
0 commit comments