Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion src/backups.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
BACKUP_TYPE_OVERRIDES,
BACKUP_USER,
PATRONI_CONF_PATH,
PGBACKREST_ARCHIVE_TIMEOUT_ERROR_CODE,
PGBACKREST_BACKUP_ID_FORMAT,
PGBACKREST_CONF_PATH,
PGBACKREST_CONFIGURATION_FILE,
Expand Down Expand Up @@ -717,15 +718,27 @@ def check_stanza(self) -> bool:
# for that or else the s3 initialization sequence will fail.
for attempt in Retrying(stop=stop_after_attempt(6), wait=wait_fixed(10), reraise=True):
with attempt:
return_code, _, stderr = self._execute_command([
return_code, stdout, stderr = self._execute_command([
PGBACKREST_EXECUTABLE,
PGBACKREST_CONFIGURATION_FILE,
f"--stanza={self.stanza_name}",
"check",
])
if return_code == PGBACKREST_ARCHIVE_TIMEOUT_ERROR_CODE:
# Raise an error if the archive command timeouts, so the user has the possibility
# to fix network issues and call juju resolve to re-trigger the hook that calls
# this method.
extracted_error = self._extract_error_message(stdout, stderr)
logger.error(
f"error: {extracted_error} - please fix the error and call juju resolve on this unit"
)
raise TimeoutError
if return_code != 0:
raise Exception(stderr)
self.charm._set_primary_status_message()
except TimeoutError as e:
# Re-raise to put charm in error state (not blocked), allowing juju resolve
raise e
except Exception:
# If the check command doesn't succeed, remove the stanza name
# and rollback the configuration.
Expand Down
4 changes: 4 additions & 0 deletions src/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@

# Snap constants.
PGBACKREST_EXECUTABLE = "charmed-postgresql.pgbackrest"
# pgBackRest error codes
PGBACKREST_ARCHIVE_TIMEOUT_ERROR_CODE = (
82 # Archive timeout - unable to archive WAL files within configured timeout period
)

SNAP_COMMON_PATH = "/var/snap/charmed-postgresql/common"
SNAP_CURRENT_PATH = "/var/snap/charmed-postgresql/current"
Expand Down
8 changes: 8 additions & 0 deletions tests/unit/test_backups.py
Original file line number Diff line number Diff line change
Expand Up @@ -860,6 +860,14 @@ def test_check_stanza(harness):
FAILED_TO_INITIALIZE_STANZA_ERROR_MESSAGE
)

# Test when the failure in the stanza check is due to an archive timeout.
_execute_command.reset_mock()
_s3_initialization_set_failure.reset_mock()
_execute_command.return_value = (82, "", "fake stderr")
with pytest.raises(TimeoutError):
harness.charm.backup.check_stanza()
_s3_initialization_set_failure.assert_not_called()

_execute_command.reset_mock()
_s3_initialization_set_failure.reset_mock()
_execute_command.return_value = (0, "fake stdout", "")
Expand Down