Skip to content

Commit 877a78c

Browse files
authored
Merge pull request #474 from mrrobot47/feat/backup-locking
feat(backup): add global lock to prevent concurrent backup OOM crashes
2 parents decc314 + de54952 commit 877a78c

File tree

1 file changed

+100
-5
lines changed

1 file changed

+100
-5
lines changed

src/helper/Site_Backup_Restore.php

Lines changed: 100 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@ class Site_Backup_Restore {
4141
private $dash_error_type = 'unknown';
4242
private $dash_error_code = 0;
4343

44+
// Global backup lock handle for serializing backups
45+
private $global_backup_lock_handle = null;
46+
4447
public function __construct() {
4548
$this->fs = new Filesystem();
4649
}
@@ -97,6 +100,12 @@ public function backup( $args, $assoc_args = [] ) {
97100
register_shutdown_function( [ $this, 'dash_shutdown_handler' ] );
98101
}
99102

103+
// Acquire global lock to serialize backups (prevents OOM from concurrent backups)
104+
$this->acquire_global_backup_lock();
105+
106+
// Register shutdown handler to release lock on any exit (error, crash, etc.)
107+
register_shutdown_function( [ $this, 'release_global_backup_lock' ] );
108+
100109
$this->pre_backup_check();
101110
$backup_dir = EE_BACKUP_DIR . '/' . $this->site_data['site_url'];
102111

@@ -146,6 +155,9 @@ public function backup( $args, $assoc_args = [] ) {
146155
}
147156
}
148157

158+
// Release global backup lock (also released by shutdown handler as safety net)
159+
$this->release_global_backup_lock();
160+
149161
delem_log( 'site backup end' );
150162
}
151163

@@ -992,7 +1004,7 @@ private function pre_restore_check() {
9921004
$this->pre_backup_restore_checks();
9931005

9941006
$remote_path = $this->get_remote_path( false );
995-
$command = sprintf( 'rclone size --json %s', $remote_path );
1007+
$command = sprintf( 'rclone size --json %s', escapeshellarg( $remote_path ) );
9961008
$output = EE::launch( $command );
9971009

9981010
if ( $output->return_code ) {
@@ -1169,7 +1181,7 @@ private function list_remote_backups( $return = false ) {
11691181

11701182
$remote_path = $this->get_rclone_config_path(); // Get remote path without creating a new timestamped folder
11711183

1172-
$command = sprintf( 'rclone lsf --dirs-only %s', $remote_path ); // List only directories
1184+
$command = sprintf( 'rclone lsf --dirs-only %s', escapeshellarg( $remote_path ) ); // List only directories
11731185
$output = EE::launch( $command );
11741186

11751187
if ( $output->return_code !== 0 && ! $return ) {
@@ -1248,7 +1260,7 @@ private function get_remote_path( $upload = true ) {
12481260
private function rclone_download( $path ) {
12491261
$cpu_cores = intval( EE::launch( 'nproc' )->stdout );
12501262
$multi_threads = min( intval( $cpu_cores ) * 2, 32 );
1251-
$command = sprintf( "rclone copy -P --multi-thread-streams %d %s %s", $multi_threads, $this->get_remote_path( false ), $path );
1263+
$command = sprintf( "rclone copy -P --multi-thread-streams %d %s %s", $multi_threads, escapeshellarg( $this->get_remote_path( false ) ), escapeshellarg( $path ) );
12521264
$output = EE::launch( $command );
12531265

12541266
if ( $output->return_code ) {
@@ -1277,7 +1289,7 @@ private function rclone_upload( $path ) {
12771289
$s3_flag = ' --s3-chunk-size=64M --s3-upload-concurrency ' . min( intval( $cpu_cores ) * 2, 32 );
12781290
}
12791291

1280-
$command = sprintf( "rclone copy -P %s --transfers %d --checkers %d --buffer-size %s %s %s", $s3_flag, $transfers, $transfers, $buffer_size, $path, $this->get_remote_path() );
1292+
$command = sprintf( "rclone copy -P %s --transfers %d --checkers %d --buffer-size %s %s %s", $s3_flag, $transfers, $transfers, $buffer_size, escapeshellarg( $path ), escapeshellarg( $this->get_remote_path() ) );
12811293
$output = EE::launch( $command );
12821294

12831295
if ( $output->return_code ) {
@@ -1289,7 +1301,7 @@ private function rclone_upload( $path ) {
12891301
EE::error( 'Error uploading backup to remote storage.' );
12901302
} else {
12911303

1292-
$command = sprintf( 'rclone lsf %s', $this->get_remote_path( false ) );
1304+
$command = sprintf( 'rclone lsf %s', escapeshellarg( $this->get_remote_path( false ) ) );
12931305
$output = EE::launch( $command );
12941306
$remote_path = $output->stdout;
12951307
EE::success( 'Backup uploaded to remote storage. Remote path: ' . $remote_path );
@@ -1603,4 +1615,87 @@ private function sanitize_count( $value ) {
16031615

16041616
return intval( $value );
16051617
}
1618+
1619+
/**
1620+
* Acquire a global backup lock to ensure only one backup runs at a time.
1621+
* Uses flock() for atomic, race-condition-free locking.
1622+
*
1623+
* This prevents multiple concurrent backups from exhausting system resources
1624+
* (RAM, CPU, disk I/O, network bandwidth) when triggered simultaneously.
1625+
*
1626+
* Note: flock() may not work reliably on NFS or other network filesystems.
1627+
* EE_BACKUP_DIR should be on a local filesystem for proper lock behavior.
1628+
*
1629+
* @return void
1630+
*/
1631+
private function acquire_global_backup_lock() {
1632+
$lock_file = EE_BACKUP_DIR . '/backup-global.lock';
1633+
$max_wait = 86400; // 24 hours max wait
1634+
$waited = 0;
1635+
$interval = 60; // Check every 60 seconds
1636+
1637+
// Ensure backup directory exists
1638+
if ( ! $this->fs->exists( EE_BACKUP_DIR ) ) {
1639+
$this->fs->mkdir( EE_BACKUP_DIR );
1640+
}
1641+
1642+
// Open file handle (creates if doesn't exist)
1643+
$this->global_backup_lock_handle = fopen( $lock_file, 'c+' );
1644+
1645+
if ( ! $this->global_backup_lock_handle ) {
1646+
$this->capture_error(
1647+
'Cannot create backup lock file',
1648+
self::ERROR_TYPE_FILESYSTEM,
1649+
5002
1650+
);
1651+
EE::error( 'Cannot create backup lock file.' );
1652+
}
1653+
1654+
// Try to acquire exclusive lock (non-blocking first to log status)
1655+
while ( ! flock( $this->global_backup_lock_handle, LOCK_EX | LOCK_NB ) ) {
1656+
if ( $waited >= $max_wait ) {
1657+
fclose( $this->global_backup_lock_handle );
1658+
$this->global_backup_lock_handle = null;
1659+
$this->capture_error(
1660+
'Timeout waiting for another backup to complete',
1661+
self::ERROR_TYPE_LOCK,
1662+
5003
1663+
);
1664+
EE::error( 'Timeout waiting for another backup. Try again later.' );
1665+
}
1666+
1667+
// Read who has the lock
1668+
rewind( $this->global_backup_lock_handle );
1669+
$lock_info = stream_get_contents( $this->global_backup_lock_handle );
1670+
1671+
EE::log( sprintf( 'Another backup in progress (%s). Waiting... (%d/%d sec)',
1672+
trim( $lock_info ) ?: 'unknown', $waited, $max_wait ) );
1673+
1674+
sleep( $interval );
1675+
$waited += $interval;
1676+
}
1677+
1678+
// Got the lock! Write our info
1679+
ftruncate( $this->global_backup_lock_handle, 0 );
1680+
rewind( $this->global_backup_lock_handle );
1681+
fwrite( $this->global_backup_lock_handle, $this->site_data['site_url'] . ' (PID: ' . getmypid() . ')' );
1682+
fflush( $this->global_backup_lock_handle );
1683+
1684+
EE::debug( 'Acquired global backup lock for: ' . $this->site_data['site_url'] );
1685+
}
1686+
1687+
/**
1688+
* Release the global backup lock.
1689+
* Safe to call multiple times (idempotent).
1690+
*
1691+
* @return void
1692+
*/
1693+
public function release_global_backup_lock() {
1694+
if ( $this->global_backup_lock_handle ) {
1695+
flock( $this->global_backup_lock_handle, LOCK_UN );
1696+
fclose( $this->global_backup_lock_handle );
1697+
$this->global_backup_lock_handle = null;
1698+
EE::debug( 'Released global backup lock' );
1699+
}
1700+
}
16061701
}

0 commit comments

Comments
 (0)