@@ -15,16 +15,18 @@ class CheckpointManager:
1515 and file storage for uploaded files.
1616 """
1717
18- def __init__ (self , db_path : str = "data/jobs.db" ):
18+ def __init__ (self , db_path : str = "data/jobs.db" , server_session_id : Optional [ str ] = None ):
1919 """
2020 Initialize checkpoint manager.
2121
2222 Args:
2323 db_path: Path to SQLite database
24+ server_session_id: Unique identifier for the current server session
2425 """
2526 self .db = Database (db_path )
2627 self .uploads_dir = Path ("data/uploads" )
2728 self .uploads_dir .mkdir (parents = True , exist_ok = True )
29+ self .server_session_id = server_session_id
2830
2931 def start_job (
3032 self ,
@@ -49,8 +51,10 @@ def start_job(
4951 if input_file_path :
5052 self ._preserve_input_file (translation_id , input_file_path , config )
5153
52- # Create job in database with updated config
53- success = self .db .create_job (translation_id , file_type , config )
54+ # Create job in database with updated config and server session ID
55+ success = self .db .create_job (
56+ translation_id , file_type , config , self .server_session_id
57+ )
5458
5559 return success
5660
@@ -263,6 +267,146 @@ def get_resumable_jobs(self) -> List[Dict[str, Any]]:
263267
264268 return jobs
265269
270+ def reset_running_jobs_on_startup (self ) -> int :
271+ """
272+ Reset jobs with 'running' status from previous server sessions to 'interrupted'.
273+
274+ Only resets jobs that have a different server_session_id, preserving
275+ jobs that are actually running in the current session. This prevents
276+ browser refreshes from interrupting active translations.
277+
278+ This should be called on server startup to handle jobs that were
279+ interrupted by a server crash or restart. These jobs will then
280+ appear in the resumable jobs list.
281+
282+ Returns:
283+ Number of jobs reset
284+ """
285+ if not self .server_session_id :
286+ # Fallback: if no session ID, don't reset anything to be safe
287+ return 0
288+ return self .db .reset_running_jobs (self .server_session_id )
289+
290+ def cleanup_old_jobs (self , max_age_days : int = 30 ) -> Tuple [int , int ]:
291+ """
292+ Clean up old jobs and their associated files.
293+
294+ This removes jobs older than max_age_days and cleans up their
295+ upload directories to prevent database and disk bloat.
296+
297+ Args:
298+ max_age_days: Maximum age in days for jobs to keep (default 30)
299+
300+ Returns:
301+ Tuple of (jobs_deleted, files_cleaned)
302+ """
303+ # Get list of old job IDs before deletion (for file cleanup)
304+ old_jobs = []
305+ try :
306+ from datetime import datetime , timedelta
307+ cutoff = datetime .now () - timedelta (days = max_age_days )
308+
309+ # Get jobs that will be deleted
310+ all_jobs = self .db .get_resumable_jobs (max_age_days = 9999 ) # Get all
311+ for job in all_jobs :
312+ created_str = job .get ('created_at' , '' )
313+ if created_str :
314+ try :
315+ created = datetime .fromisoformat (created_str .replace ('Z' , '+00:00' ))
316+ if created .replace (tzinfo = None ) < cutoff :
317+ old_jobs .append (job ['translation_id' ])
318+ except (ValueError , TypeError ):
319+ pass
320+ except Exception as e :
321+ print (f"Warning: Error getting old job list: { e } " )
322+
323+ # Delete from database
324+ jobs_deleted = self .db .cleanup_old_jobs (max_age_days )
325+
326+ # Clean up upload directories for deleted jobs
327+ files_cleaned = 0
328+ for job_id in old_jobs :
329+ job_upload_dir = self .uploads_dir / job_id
330+ if job_upload_dir .exists ():
331+ try :
332+ shutil .rmtree (job_upload_dir )
333+ files_cleaned += 1
334+ except Exception as e :
335+ print (f"Warning: Could not delete upload directory for { job_id } : { e } " )
336+
337+ return jobs_deleted , files_cleaned
338+
339+ def cleanup_orphan_uploads (self ) -> int :
340+ """
341+ Clean up upload files/directories that don't have corresponding jobs in the database.
342+
343+ These are "orphan" items left behind from previous incomplete cleanups.
344+ Handles:
345+ - trans_xxx folders (job ID folders)
346+ - hash_filename files (legacy upload files)
347+
348+ Returns:
349+ Number of orphan items deleted
350+ """
351+ orphans_deleted = 0
352+
353+ if not self .uploads_dir .exists ():
354+ return 0
355+
356+ # Get all job IDs and preserved file paths from database
357+ try :
358+ import sqlite3
359+ import json
360+ conn = sqlite3 .connect (self .db .db_path )
361+ conn .row_factory = sqlite3 .Row
362+ cursor = conn .cursor ()
363+ cursor .execute ("SELECT translation_id, config FROM translation_jobs" )
364+ db_job_ids = set ()
365+ preserved_files = set () # Full file paths that are referenced
366+ for row in cursor .fetchall ():
367+ db_job_ids .add (row ['translation_id' ])
368+ config = json .loads (row ['config' ])
369+ preserved_path = config .get ('preserved_input_path' , '' )
370+ if preserved_path :
371+ # Store the filename to check against orphan files
372+ preserved_files .add (Path (preserved_path ).name )
373+ conn .close ()
374+ except Exception as e :
375+ print (f"Warning: Error getting job IDs: { e } " )
376+ return 0
377+
378+ # Check each item in uploads directory
379+ for item in self .uploads_dir .iterdir ():
380+ item_name = item .name
381+
382+ # Skip test folders
383+ if item_name .startswith ('test_' ):
384+ continue
385+
386+ is_orphan = True
387+
388+ if item .is_dir ():
389+ # It's a folder - check if it's a job ID folder
390+ if item_name .startswith ('trans_' ):
391+ if item_name in db_job_ids :
392+ is_orphan = False
393+ else :
394+ # It's a file - check if it's referenced by any job
395+ if item_name in preserved_files :
396+ is_orphan = False
397+
398+ if is_orphan :
399+ try :
400+ if item .is_dir ():
401+ shutil .rmtree (item )
402+ else :
403+ item .unlink ()
404+ orphans_deleted += 1
405+ except Exception as e :
406+ print (f"Warning: Could not delete orphan { item_name } : { e } " )
407+
408+ return orphans_deleted
409+
266410 def mark_paused (self , translation_id : str ) -> bool :
267411 """
268412 Mark a job as paused (user-initiated stop).
0 commit comments