From 8206f3b5927133fd68186be751f2df3975c8b500 Mon Sep 17 00:00:00 2001 From: Timeraider <57343973+GitTimeraider@users.noreply.github.com> Date: Tue, 9 Sep 2025 21:28:56 +0200 Subject: [PATCH 1/2] Double run issue fixes --- app.py | 79 ++++++++++++++++++++---- backup_service.py | 154 +++++++++++++++++++++++++++++++++++++++------- 2 files changed, 199 insertions(+), 34 deletions(-) diff --git a/app.py b/app.py index a006bd1..48f9534 100644 --- a/app.py +++ b/app.py @@ -169,6 +169,24 @@ def format_local_time_filter(utc_dt, format_str='%Y-%m-%d %H:%M'): def schedule_all_repositories(): """Schedule all active repositories on startup""" try: + # Clean up any stuck 'running' jobs from previous sessions + stuck_jobs = BackupJob.query.filter_by(status='running').all() + if stuck_jobs: + logger.warning(f"Found {len(stuck_jobs)} stuck 'running' jobs from previous session") + for stuck_job in stuck_jobs: + stuck_job.status = 'failed' + stuck_job.error_message = 'Job was running when application restarted' + stuck_job.completed_at = datetime.utcnow() + logger.info(f"Marked stuck job as failed: {stuck_job.id} for repository {stuck_job.repository_id}") + db.session.commit() + + # First, clear any existing jobs to prevent duplicates + existing_jobs = scheduler.get_jobs() + for job in existing_jobs: + if job.id.startswith('backup_'): + scheduler.remove_job(job.id) + logger.info(f"Removed existing job on startup: {job.id}") + repositories = Repository.query.filter_by(is_active=True).all() scheduled_count = 0 for repository in repositories: @@ -456,14 +474,23 @@ def edit_repository(repo_id): db.session.commit() - # Reschedule the backup job + # Reschedule the backup job - more robust approach + job_id = f'backup_{repo_id}' try: - scheduler.remove_job(f'backup_{repo_id}', jobstore=None) - except: - pass + # Remove job if it exists + if scheduler.get_job(job_id): + scheduler.remove_job(job_id) + logger.info(f"Removed existing job during edit: {job_id}") + except Exception as e: + logger.warning(f"Could not remove job during edit {job_id}: {e}") + + # Wait a moment to ensure job removal is complete + import time + time.sleep(0.1) - if repository.is_active: + if repository.is_active and repository.schedule_type != 'manual': schedule_backup_job(repository) + logger.info(f"Rescheduled job for repository: {repository.name}") flash('Repository updated successfully', 'success') return redirect(url_for('repositories')) @@ -590,11 +617,18 @@ def schedule_backup_job(repository): job_id = f'backup_{repository.id}' - # Remove existing job if it exists + # Remove existing job if it exists - try multiple ways to ensure it's gone try: - scheduler.remove_job(job_id) - except: - pass + if scheduler.get_job(job_id): + scheduler.remove_job(job_id) + logger.info(f"Removed existing scheduled job: {job_id}") + except Exception as e: + logger.warning(f"Could not remove existing job {job_id}: {e}") + + # Double-check that job is really gone + if scheduler.get_job(job_id): + logger.error(f"Job {job_id} still exists after removal attempt") + return # Create a wrapper function that includes Flask app context def backup_with_context(): @@ -616,9 +650,22 @@ def backup_with_context(): logger.warning(f"Backup already running for repository {repo.name}, skipping") return + # Additional check: ensure no backup started in the last 5 minutes to prevent rapid duplicates + recent_backup = BackupJob.query.filter_by( + repository_id=repository.id + ).filter( + BackupJob.started_at > datetime.utcnow() - timedelta(minutes=5) + ).first() + + if recent_backup: + logger.warning(f"Recent backup found for repository {repo.name} (started at {recent_backup.started_at}), skipping") + return + + logger.info(f"Starting scheduled backup for repository: {repo.name}") backup_service.backup_repository(repo) + except Exception as e: - logger.error(f"Error in scheduled backup for repository {repository.id}: {e}") + logger.error(f"Error in scheduled backup for repository {repository.id}: {e}", exc_info=True) # Create new schedule based on schedule_type if repository.schedule_type == 'hourly': @@ -693,11 +740,19 @@ def backup_with_context(): id=job_id, name=f'Backup {repository.name}', replace_existing=True, - misfire_grace_time=300, # 5 minutes grace time - coalesce=True # Combine multiple pending executions + misfire_grace_time=60, # Reduced from 5 minutes to 1 minute + coalesce=True, # Combine multiple pending executions + max_instances=1 # Only one instance of this specific job can run ) logger.info(f"Scheduled backup job for {repository.name} with trigger: {trigger}") + + # Verify the job was actually added + added_job = scheduler.get_job(job_id) + if added_job: + logger.info(f"Job {job_id} successfully scheduled, next run: {added_job.next_run_time}") + else: + logger.error(f"Failed to schedule job {job_id}") if __name__ == '__main__': app.run(host='0.0.0.0', port=8080, debug=False) diff --git a/backup_service.py b/backup_service.py index abde85c..827749d 100644 --- a/backup_service.py +++ b/backup_service.py @@ -41,19 +41,32 @@ def backup_repository(self, repository): repo_backup_dir = user_backup_dir / repository.name repo_backup_dir.mkdir(exist_ok=True) - # Generate timestamp for this backup - timestamp = datetime.utcnow().strftime('%Y%m%d_%H%M%S') - backup_name = f"{repository.name}_{timestamp}" + # Generate timestamp for this backup with microseconds for uniqueness + timestamp = datetime.utcnow().strftime('%Y%m%d_%H%M%S_%f') + backup_name = f"{repository.name}_{timestamp[:19]}" # Keep readable format for backup name # Create unique temporary directory and ensure it's clean temp_clone_dir = repo_backup_dir / f"temp_{timestamp}" - # Remove temp directory if it already exists - if temp_clone_dir.exists(): + # Ensure temp directory doesn't exist and create it + retry_count = 0 + max_retries = 5 + while temp_clone_dir.exists() and retry_count < max_retries: logger.warning(f"Temp directory already exists, removing: {temp_clone_dir}") - shutil.rmtree(temp_clone_dir) + try: + shutil.rmtree(temp_clone_dir) + break + except (OSError, PermissionError) as e: + retry_count += 1 + if retry_count >= max_retries: + raise Exception(f"Unable to clean temp directory after {max_retries} attempts: {e}") + # Add a small delay and try with a new timestamp + import time + time.sleep(0.1) + timestamp = datetime.utcnow().strftime('%Y%m%d_%H%M%S_%f') + temp_clone_dir = repo_backup_dir / f"temp_{timestamp}" - temp_clone_dir.mkdir(exist_ok=True) + temp_clone_dir.mkdir(parents=True, exist_ok=False) self._clone_repository(repository, temp_clone_dir) @@ -84,6 +97,12 @@ def backup_repository(self, repository): backup_job.status = 'failed' backup_job.error_message = str(e) backup_job.completed_at = datetime.utcnow() + + # Ensure we commit the failed status immediately + try: + db.session.commit() + except Exception as commit_error: + logger.error(f"Failed to commit backup job failure status: {commit_error}") finally: # Always clean up temporary directory @@ -93,8 +112,30 @@ def backup_repository(self, repository): shutil.rmtree(temp_clone_dir) except Exception as cleanup_error: logger.error(f"Failed to cleanup temp directory {temp_clone_dir}: {cleanup_error}") + # Try force cleanup + try: + import stat + def handle_remove_readonly(func, path, exc): + if exc[1].errno == 13: # Permission denied + os.chmod(path, stat.S_IWRITE) + func(path) + else: + raise + shutil.rmtree(temp_clone_dir, onerror=handle_remove_readonly) + logger.info(f"Force cleaned temp directory: {temp_clone_dir}") + except Exception as force_error: + logger.error(f"Could not force clean temp directory: {force_error}") - db.session.commit() + # Final commit to ensure all changes are saved + try: + db.session.commit() + except Exception as final_commit_error: + logger.error(f"Failed final commit for backup job: {final_commit_error}") + # Try to rollback to prevent session issues + try: + db.session.rollback() + except: + pass def _clone_repository(self, repository, clone_dir): """Clone a repository to the specified directory""" @@ -109,28 +150,97 @@ def _clone_repository(self, repository, clone_dir): # Clean up any existing temp directories for this repository first self._cleanup_temp_directories(clone_dir.parent) + # Ensure the clone directory is completely clean before starting + if clone_dir.exists(): + logger.warning(f"Clone directory exists before cloning, removing: {clone_dir}") + try: + shutil.rmtree(clone_dir) + except Exception as e: + logger.error(f"Failed to remove existing clone directory: {e}") + raise Exception(f"Cannot clean clone directory: {e}") + + # Recreate the directory to ensure it's empty + clone_dir.mkdir(parents=True, exist_ok=False) + # Clone the repository with error handling try: - git.Repo.clone_from(clone_url, clone_dir, depth=1) - logger.info(f"Repository cloned to: {clone_dir}") - except git.GitCommandError as e: - if "already exists and is not an empty directory" in str(e): - logger.warning(f"Directory exists, cleaning and retrying: {clone_dir}") - shutil.rmtree(clone_dir) - clone_dir.mkdir(exist_ok=True) - git.Repo.clone_from(clone_url, clone_dir, depth=1) - else: - raise e + # Use git command directly for better error handling + import subprocess + git_cmd = [ + 'git', 'clone', + '--depth=1', + '--verbose', + '--config', 'core.autocrlf=false', # Prevent line ending issues + '--config', 'core.filemode=false', # Prevent permission issues + clone_url, + str(clone_dir) + ] + + result = subprocess.run( + git_cmd, + capture_output=True, + text=True, + timeout=300, # 5 minute timeout + cwd=str(clone_dir.parent) + ) + + if result.returncode != 0: + error_msg = f"Git clone failed with exit code {result.returncode}\n" + error_msg += f"stdout: {result.stdout}\n" + error_msg += f"stderr: {result.stderr}" + logger.error(error_msg) + raise Exception(f"Git clone failed: {result.stderr}") + + logger.info(f"Repository cloned successfully to: {clone_dir}") + + except subprocess.TimeoutExpired: + logger.error(f"Git clone timed out for repository: {repository.url}") + raise Exception("Git clone operation timed out") + except Exception as e: + logger.error(f"Git clone failed for {repository.url}: {str(e)}") + # Clean up on failure + if clone_dir.exists(): + try: + shutil.rmtree(clone_dir) + except: + pass + raise e def _cleanup_temp_directories(self, repo_backup_dir): """Clean up old temporary directories that might be left behind""" try: + if not repo_backup_dir.exists(): + return + temp_dirs = [d for d in repo_backup_dir.iterdir() if d.is_dir() and d.name.startswith('temp_')] + current_time = datetime.utcnow().timestamp() + for temp_dir in temp_dirs: - # Remove temp directories older than 1 hour - if datetime.utcnow().timestamp() - temp_dir.stat().st_mtime > 3600: - logger.info(f"Cleaning up old temp directory: {temp_dir}") - shutil.rmtree(temp_dir) + try: + # Remove temp directories older than 10 minutes or any that exist from failed jobs + dir_age = current_time - temp_dir.stat().st_mtime + if dir_age > 600: # 10 minutes + logger.info(f"Cleaning up old temp directory: {temp_dir}") + shutil.rmtree(temp_dir) + elif not any(temp_dir.iterdir()): # Empty directory + logger.info(f"Cleaning up empty temp directory: {temp_dir}") + shutil.rmtree(temp_dir) + except (OSError, PermissionError) as e: + logger.warning(f"Failed to remove temp directory {temp_dir}: {e}") + # Try to force remove if it's a permission issue + try: + import stat + def handle_remove_readonly(func, path, exc): + if exc[1].errno == 13: # Permission denied + os.chmod(path, stat.S_IWRITE) + func(path) + else: + raise + shutil.rmtree(temp_dir, onerror=handle_remove_readonly) + logger.info(f"Force removed temp directory: {temp_dir}") + except Exception as force_error: + logger.error(f"Could not force remove temp directory {temp_dir}: {force_error}") + except Exception as e: logger.warning(f"Failed to cleanup temp directories: {e}") From 73eec35604faac7e05cda41f344233b0d3e3ef08 Mon Sep 17 00:00:00 2001 From: Timeraider <57343973+GitTimeraider@users.noreply.github.com> Date: Tue, 9 Sep 2025 21:33:03 +0200 Subject: [PATCH 2/2] More fixes --- app.py | 37 ++++++++++++++++++++++++++++--------- backup_service.py | 34 ++++++++++++++++++++++++++++++++-- 2 files changed, 60 insertions(+), 11 deletions(-) diff --git a/app.py b/app.py index 48f9534..51cb043 100644 --- a/app.py +++ b/app.py @@ -198,15 +198,24 @@ def schedule_all_repositories(): except Exception as e: logger.error(f"Error scheduling repositories on startup: {e}") -# Flag to ensure we only initialize once +# Thread-safe flag to ensure we only initialize once +import threading +_scheduler_lock = threading.Lock() _scheduler_initialized = False def ensure_scheduler_initialized(): - """Ensure scheduler is initialized with existing repositories""" + """Ensure scheduler is initialized with existing repositories (thread-safe)""" global _scheduler_initialized - if not _scheduler_initialized: - schedule_all_repositories() - _scheduler_initialized = True + if _scheduler_initialized: + return + + with _scheduler_lock: + # Double-check pattern to avoid race conditions + if not _scheduler_initialized: + logger.info("Initializing scheduler with existing repositories...") + schedule_all_repositories() + _scheduler_initialized = True + logger.info("Scheduler initialization completed") @login_manager.user_loader def load_user(user_id): @@ -215,7 +224,6 @@ def load_user(user_id): @app.route('/') @login_required def dashboard(): - ensure_scheduler_initialized() repositories = Repository.query.filter_by(user_id=current_user.id).all() recent_jobs = BackupJob.query.filter_by(user_id=current_user.id).order_by(BackupJob.created_at.desc()).limit(10).all() return render_template('dashboard.html', repositories=repositories, recent_jobs=recent_jobs) @@ -647,14 +655,15 @@ def backup_with_context(): ).first() if running_job: - logger.warning(f"Backup already running for repository {repo.name}, skipping") + logger.warning(f"Backup already running for repository {repo.name} (job {running_job.id}), skipping") return - # Additional check: ensure no backup started in the last 5 minutes to prevent rapid duplicates + # Additional check: ensure no backup started in the last 30 seconds to prevent rapid duplicates + recent_cutoff = datetime.utcnow() - timedelta(seconds=30) recent_backup = BackupJob.query.filter_by( repository_id=repository.id ).filter( - BackupJob.started_at > datetime.utcnow() - timedelta(minutes=5) + BackupJob.started_at > recent_cutoff ).first() if recent_backup: @@ -754,5 +763,15 @@ def backup_with_context(): else: logger.error(f"Failed to schedule job {job_id}") +# Initialize scheduler with existing repositories at startup +# This runs after all functions are defined +try: + with app.app_context(): + logger.info("Starting scheduler initialization at app startup...") + ensure_scheduler_initialized() + logger.info("Scheduler initialization at startup completed") +except Exception as e: + logger.error(f"Failed to initialize scheduler at startup: {e}") + if __name__ == '__main__': app.run(host='0.0.0.0', port=8080, debug=False) diff --git a/backup_service.py b/backup_service.py index 827749d..fa2f581 100644 --- a/backup_service.py +++ b/backup_service.py @@ -4,7 +4,7 @@ import zipfile import tarfile import logging -from datetime import datetime +from datetime import datetime, timedelta from pathlib import Path from github import Github from models import db, BackupJob @@ -21,6 +21,28 @@ def backup_repository(self, repository): """Backup a repository according to its settings""" logger.info(f"Starting backup for repository: {repository.name}") + # Check if there's already a running backup for this repository + existing_running_job = BackupJob.query.filter_by( + repository_id=repository.id, + status='running' + ).first() + + if existing_running_job: + logger.warning(f"Backup already running for repository {repository.name} (job {existing_running_job.id}), skipping") + return + + # Also check for very recent backups (within last 30 seconds) to prevent rapid duplicates + recent_cutoff = datetime.utcnow() - timedelta(seconds=30) + recent_job = BackupJob.query.filter_by( + repository_id=repository.id + ).filter( + BackupJob.started_at > recent_cutoff + ).first() + + if recent_job: + logger.warning(f"Very recent backup found for repository {repository.name} (started at {recent_job.started_at}), skipping to prevent duplicates") + return + # Create backup job record backup_job = BackupJob( user_id=repository.user_id, @@ -29,7 +51,15 @@ def backup_repository(self, repository): started_at=datetime.utcnow() ) db.session.add(backup_job) - db.session.commit() + + # Commit immediately to make this job visible to other processes/threads + try: + db.session.commit() + logger.info(f"Created backup job {backup_job.id} for repository {repository.name}") + except Exception as e: + logger.error(f"Failed to commit backup job creation: {e}") + db.session.rollback() + return temp_clone_dir = None try: