Skip to content

Commit 6dadf25

Browse files
fix(api): cleanup file garantito tramite finally block dopo upload su Qdrant o fallimento pipeline
1 parent e6c0698 commit 6dadf25

File tree

2 files changed

+50
-8
lines changed

2 files changed

+50
-8
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ and the project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.
77

88
## [Unreleased]
99

10+
### Fixed
11+
- File cleanup now guaranteed via finally block: uploaded files are deleted after Qdrant upload or on pipeline failure
12+
1013
## [1.2.0] - 2025-12-02
1114

1215
### Added

api/routes/upload.py

Lines changed: 47 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,20 @@ def update_progress(stage: str, progress: float):
170170
jobs[job_id]["message"] = error_msg
171171
jobs[job_id]["completed_at"] = datetime.utcnow().isoformat()
172172

173+
finally:
174+
# Cleanup: delete uploaded files after processing (success or failure)
175+
cleanup_count = 0
176+
for filename in filenames:
177+
file_path = collection_dir / filename
178+
try:
179+
if file_path.exists():
180+
file_path.unlink()
181+
cleanup_count += 1
182+
except Exception as cleanup_err:
183+
logger.warning(f"[{job_id}] Failed to cleanup {file_path}: {cleanup_err}")
184+
185+
logger.info(f"[{job_id}] Cleanup: removed {cleanup_count}/{len(filenames)} uploaded files")
186+
173187

174188
@router.post("/upload")
175189
async def upload_file(
@@ -326,14 +340,15 @@ def run_zip_indexing(job_id: str, zip_path: Path, collection_dir: Path, collecti
326340
"""
327341
logger.info(f"[{job_id}] Starting ZIP extraction from {zip_path}")
328342

343+
extracted_files = [] # Track files for cleanup in finally block
344+
329345
try:
330346
jobs[job_id]["status"] = "running"
331347
jobs[job_id]["stage"] = "extracting_zip"
332348
jobs[job_id]["message"] = "Extracting ZIP archive"
333349
jobs[job_id]["progress"] = 0.05
334350

335351
# Extract ZIP, filtering out macOS metadata and hidden files
336-
extracted_files = []
337352
with zipfile.ZipFile(zip_path, 'r') as zf:
338353
for name in zf.namelist():
339354
# Skip macOS metadata, hidden files, and directories
@@ -403,18 +418,42 @@ def update_progress(stage: str, progress: float):
403418
logger.error(f"[{job_id}] ZIP indexing FAILED: {error_msg}")
404419
logger.error(f"[{job_id}] Stack trace:\n{traceback.format_exc()}")
405420

406-
# Cleanup ZIP if still exists
407-
if zip_path.exists():
408-
try:
409-
zip_path.unlink()
410-
except Exception:
411-
pass
412-
413421
jobs[job_id]["status"] = "failed"
414422
jobs[job_id]["stage"] = "failed"
415423
jobs[job_id]["message"] = error_msg
416424
jobs[job_id]["completed_at"] = datetime.utcnow().isoformat()
417425

426+
finally:
427+
# Cleanup: delete ZIP if still exists
428+
if zip_path.exists():
429+
try:
430+
zip_path.unlink()
431+
logger.debug(f"[{job_id}] Cleanup: removed ZIP file")
432+
except Exception as cleanup_err:
433+
logger.warning(f"[{job_id}] Failed to cleanup ZIP {zip_path}: {cleanup_err}")
434+
435+
# Cleanup: delete extracted files after processing (success or failure)
436+
cleanup_count = 0
437+
for filename in extracted_files:
438+
file_path = collection_dir / filename
439+
try:
440+
if file_path.exists():
441+
file_path.unlink()
442+
cleanup_count += 1
443+
# Also try to remove parent dirs if empty (for nested paths)
444+
parent = file_path.parent
445+
while parent != collection_dir:
446+
try:
447+
parent.rmdir() # Only removes if empty
448+
parent = parent.parent
449+
except OSError:
450+
break
451+
except Exception as cleanup_err:
452+
logger.warning(f"[{job_id}] Failed to cleanup {file_path}: {cleanup_err}")
453+
454+
if extracted_files:
455+
logger.info(f"[{job_id}] Cleanup: removed {cleanup_count}/{len(extracted_files)} extracted files")
456+
418457

419458
@router.post("/upload-zip")
420459
async def upload_zip(

0 commit comments

Comments
 (0)