@@ -170,6 +170,20 @@ def update_progress(stage: str, progress: float):
170170 jobs [job_id ]["message" ] = error_msg
171171 jobs [job_id ]["completed_at" ] = datetime .utcnow ().isoformat ()
172172
173+ finally :
174+ # Cleanup: delete uploaded files after processing (success or failure)
175+ cleanup_count = 0
176+ for filename in filenames :
177+ file_path = collection_dir / filename
178+ try :
179+ if file_path .exists ():
180+ file_path .unlink ()
181+ cleanup_count += 1
182+ except Exception as cleanup_err :
183+ logger .warning (f"[{ job_id } ] Failed to cleanup { file_path } : { cleanup_err } " )
184+
185+ logger .info (f"[{ job_id } ] Cleanup: removed { cleanup_count } /{ len (filenames )} uploaded files" )
186+
173187
174188@router .post ("/upload" )
175189async def upload_file (
@@ -326,14 +340,15 @@ def run_zip_indexing(job_id: str, zip_path: Path, collection_dir: Path, collecti
326340 """
327341 logger .info (f"[{ job_id } ] Starting ZIP extraction from { zip_path } " )
328342
343+ extracted_files = [] # Track files for cleanup in finally block
344+
329345 try :
330346 jobs [job_id ]["status" ] = "running"
331347 jobs [job_id ]["stage" ] = "extracting_zip"
332348 jobs [job_id ]["message" ] = "Extracting ZIP archive"
333349 jobs [job_id ]["progress" ] = 0.05
334350
335351 # Extract ZIP, filtering out macOS metadata and hidden files
336- extracted_files = []
337352 with zipfile .ZipFile (zip_path , 'r' ) as zf :
338353 for name in zf .namelist ():
339354 # Skip macOS metadata, hidden files, and directories
@@ -403,18 +418,42 @@ def update_progress(stage: str, progress: float):
403418 logger .error (f"[{ job_id } ] ZIP indexing FAILED: { error_msg } " )
404419 logger .error (f"[{ job_id } ] Stack trace:\n { traceback .format_exc ()} " )
405420
406- # Cleanup ZIP if still exists
407- if zip_path .exists ():
408- try :
409- zip_path .unlink ()
410- except Exception :
411- pass
412-
413421 jobs [job_id ]["status" ] = "failed"
414422 jobs [job_id ]["stage" ] = "failed"
415423 jobs [job_id ]["message" ] = error_msg
416424 jobs [job_id ]["completed_at" ] = datetime .utcnow ().isoformat ()
417425
426+ finally :
427+ # Cleanup: delete ZIP if still exists
428+ if zip_path .exists ():
429+ try :
430+ zip_path .unlink ()
431+ logger .debug (f"[{ job_id } ] Cleanup: removed ZIP file" )
432+ except Exception as cleanup_err :
433+ logger .warning (f"[{ job_id } ] Failed to cleanup ZIP { zip_path } : { cleanup_err } " )
434+
435+ # Cleanup: delete extracted files after processing (success or failure)
436+ cleanup_count = 0
437+ for filename in extracted_files :
438+ file_path = collection_dir / filename
439+ try :
440+ if file_path .exists ():
441+ file_path .unlink ()
442+ cleanup_count += 1
443+ # Also try to remove parent dirs if empty (for nested paths)
444+ parent = file_path .parent
445+ while parent != collection_dir :
446+ try :
447+ parent .rmdir () # Only removes if empty
448+ parent = parent .parent
449+ except OSError :
450+ break
451+ except Exception as cleanup_err :
452+ logger .warning (f"[{ job_id } ] Failed to cleanup { file_path } : { cleanup_err } " )
453+
454+ if extracted_files :
455+ logger .info (f"[{ job_id } ] Cleanup: removed { cleanup_count } /{ len (extracted_files )} extracted files" )
456+
418457
419458@router .post ("/upload-zip" )
420459async def upload_zip (
0 commit comments