Skip to content

Commit 90603d9

Browse files
authored
Merge pull request #240 from itsDNNS/fix/backup-restart-duplicates
fix: prevent duplicate backups on container restart
2 parents 387db5f + d475977 commit 90603d9

File tree

2 files changed

+115
-0
lines changed

2 files changed

+115
-0
lines changed

app/modules/backup/collector.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Scheduled backup collector."""
22

33
import logging
4+
import time
45

56
from app.collectors.base import Collector, CollectorResult
67

@@ -16,6 +17,30 @@ def __init__(self, config_mgr, poll_interval=86400, **kwargs):
1617
self._config_mgr = config_mgr
1718
interval_hours = self._get_interval_hours(config_mgr, poll_interval // 3600)
1819
super().__init__(interval_hours * 3600)
20+
self._seed_last_poll()
21+
22+
def _seed_last_poll(self):
23+
"""Set _last_poll from newest backup on disk to survive container restarts.
24+
25+
Seeds from the newest file regardless of source (scheduled or manual).
26+
This means a manual backup can shift the automatic schedule after a
27+
restart, which is acceptable: the guarantee is "at least one backup
28+
every <interval>", not "backups at a fixed time of day".
29+
"""
30+
from datetime import datetime
31+
from .backup import list_backups
32+
33+
backup_path = self._config_mgr.get("backup_path", "/backup")
34+
backups = list_backups(backup_path)
35+
if not backups:
36+
return
37+
try:
38+
dt = datetime.fromisoformat(backups[0]["modified"])
39+
self._last_poll = dt.timestamp()
40+
age_hours = (time.time() - self._last_poll) / 3600
41+
log.info("Backup schedule seeded from disk — newest backup is %.1fh old", age_hours)
42+
except (ValueError, TypeError):
43+
pass
1944

2045
@staticmethod
2146
def _get_interval_hours(config_mgr, default_hours=24):

tests/test_collectors.py

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -627,6 +627,96 @@ def test_name(self):
627627
assert c.name == "speedtest"
628628

629629

630+
# ── BackupCollector Tests ──
631+
632+
633+
class TestBackupCollector:
634+
def _make_collector(self, configured=True, interval_hours=24, backups=None):
635+
config_mgr = MagicMock()
636+
config_mgr.is_backup_configured.return_value = configured
637+
config_mgr.data_dir = "/data"
638+
config_mgr.get.side_effect = lambda k, *a: {
639+
"backup_path": "/backup",
640+
"backup_interval_hours": interval_hours,
641+
"backup_retention": 5,
642+
}.get(k, a[0] if a else None)
643+
644+
from app.modules.backup.collector import BackupCollector
645+
with patch("app.modules.backup.backup.list_backups", return_value=backups or []):
646+
c = BackupCollector(config_mgr=config_mgr)
647+
return c, config_mgr
648+
649+
def test_name(self):
650+
c, _ = self._make_collector()
651+
assert c.name == "backup"
652+
653+
def test_is_enabled(self):
654+
c, _ = self._make_collector(configured=True)
655+
assert c.is_enabled() is True
656+
c2, _ = self._make_collector(configured=False)
657+
assert c2.is_enabled() is False
658+
659+
def test_interval_from_config(self):
660+
c, _ = self._make_collector(interval_hours=168)
661+
assert c._poll_interval_seconds == 168 * 3600
662+
663+
def test_seed_last_poll_from_disk(self):
664+
"""_last_poll is seeded from newest backup file on init."""
665+
from datetime import datetime, timedelta
666+
two_hours_ago = (datetime.now() - timedelta(hours=2)).isoformat()
667+
backups = [{"filename": "docsight_backup_test.tar.gz", "size": 100, "modified": two_hours_ago}]
668+
c, _ = self._make_collector(backups=backups)
669+
# _last_poll should be close to 2h ago, not 0
670+
assert c._last_poll > 0
671+
age = time.time() - c._last_poll
672+
assert 7000 < age < 7400 # ~2h in seconds
673+
674+
def test_seed_no_backups_leaves_last_poll_zero(self):
675+
"""No backups on disk → _last_poll stays 0, first backup runs immediately."""
676+
c, _ = self._make_collector(backups=[])
677+
assert c._last_poll == 0.0
678+
679+
def test_should_poll_false_after_seed(self):
680+
"""Container restart with recent backup → should_poll() returns False."""
681+
from datetime import datetime, timedelta
682+
one_hour_ago = (datetime.now() - timedelta(hours=1)).isoformat()
683+
backups = [{"filename": "docsight_backup_test.tar.gz", "size": 100, "modified": one_hour_ago}]
684+
c, _ = self._make_collector(interval_hours=24, backups=backups)
685+
assert c.should_poll() is False
686+
687+
def test_should_poll_true_when_backup_expired(self):
688+
"""Backup older than interval → should_poll() returns True."""
689+
from datetime import datetime, timedelta
690+
two_days_ago = (datetime.now() - timedelta(days=2)).isoformat()
691+
backups = [{"filename": "docsight_backup_old.tar.gz", "size": 100, "modified": two_days_ago}]
692+
c, _ = self._make_collector(interval_hours=24, backups=backups)
693+
assert c.should_poll() is True
694+
695+
def test_seed_includes_manual_backups(self):
696+
"""Seed uses newest backup regardless of source (scheduled or manual).
697+
698+
After a restart, _last_poll anchors to the newest file on disk.
699+
This means a manual backup can shift the automatic schedule, which
700+
is by design: the guarantee is "at least one backup every <interval>".
701+
"""
702+
from datetime import datetime, timedelta
703+
one_hour_ago = (datetime.now() - timedelta(hours=1)).isoformat()
704+
backups = [{"filename": "docsight_backup_2026-03-15_120000.tar.gz", "size": 100, "modified": one_hour_ago}]
705+
c, _ = self._make_collector(interval_hours=24, backups=backups)
706+
# _last_poll is seeded from the file, so should_poll() waits
707+
assert c.should_poll() is False
708+
709+
@patch("app.modules.backup.backup.create_backup_to_file")
710+
@patch("app.modules.backup.backup.cleanup_old_backups")
711+
def test_collect_creates_backup(self, mock_cleanup, mock_create):
712+
mock_create.return_value = "docsight_backup_2026-03-15.tar.gz"
713+
c, _ = self._make_collector()
714+
result = c.collect()
715+
assert result.success is True
716+
mock_create.assert_called_once()
717+
mock_cleanup.assert_called_once()
718+
719+
630720
# ── BQMCollector Tests ──
631721

632722

0 commit comments

Comments
 (0)