Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 81 additions & 7 deletions src/backups.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
BACKUP_TYPE_OVERRIDES,
BACKUP_USER,
PGBACKREST_LOGROTATE_FILE,
PGBACKREST_LOGS_PATH,
WORKLOAD_OS_GROUP,
WORKLOAD_OS_USER,
)
Expand All @@ -55,6 +56,11 @@
]


def is_s3_block_message(message: str) -> bool:
"""Check if a status message is an S3 block message (with possible error hint suffix)."""
return any(message.startswith(block_msg) for block_msg in S3_BLOCK_MESSAGES)


class PostgreSQLBackups(Object):
"""In this class, we manage PostgreSQL backups."""

Expand Down Expand Up @@ -193,8 +199,21 @@ def can_use_s3_repository(self) -> tuple[bool, str | None]:

for stanza in json.loads(output):
if (stanza_name := stanza.get("name")) and stanza_name == "[invalid]":
logger.error("Invalid stanza name from s3")
return False, FAILED_TO_INITIALIZE_STANZA_ERROR_MESSAGE
repo_message = next(
(
repo["status"]["message"]
for repo in stanza.get("repo", [])
if repo.get("status", {}).get("message")
),
"",
)[:120]
logger.error("Invalid stanza name from s3: %s", repo_message)
error_message = (
f"{FAILED_TO_INITIALIZE_STANZA_ERROR_MESSAGE}: {repo_message}"
if repo_message
else FAILED_TO_INITIALIZE_STANZA_ERROR_MESSAGE
)
return False, error_message
if stanza_name != self.stanza_name:
logger.debug(
f"can_use_s3_repository: incompatible stanza name s3={stanza_name or ''}, local={self.stanza_name}"
Expand Down Expand Up @@ -546,6 +565,45 @@ def _parse_backup_id(self, label) -> tuple[str, str]:
backup_type,
)

@staticmethod
def _extract_error_message(
stdout: str | None, stderr: str | None, *, for_status: bool = False
) -> str | None:
"""Extract key error message from pgBackRest output.

Args:
stdout: Standard output from pgBackRest command.
stderr: Standard error from pgBackRest command.
for_status: If True, return None instead of a generic fallback message.

Returns:
Extracted error message, prioritizing ERROR/WARN lines from output.
"""
combined_output = f"{stdout or ''}\n{stderr or ''}".strip()
if not combined_output:
if for_status:
return None
return f"Unknown error occurred. Please check the logs at {PGBACKREST_LOGS_PATH}"

error_lines = []
for line in combined_output.splitlines():
if "ERROR:" in line or "WARN:" in line:
cleaned = re.sub(r"^.*?(ERROR:|WARN:)", r"\1", line).strip()
error_lines.append(cleaned)

if error_lines:
result = "; ".join(error_lines)
elif stderr and stderr.strip():
result = stderr.strip().splitlines()[-1]
elif stdout and stdout.strip():
result = stdout.strip().splitlines()[-1]
elif for_status:
return None
else:
return f"Unknown error occurred. Please check the logs at {PGBACKREST_LOGS_PATH}"

return result[:120]

def _initialise_stanza(self, event: HookEvent) -> bool:
"""Initialize the stanza.

Expand All @@ -555,7 +613,7 @@ def _initialise_stanza(self, event: HookEvent) -> bool:
"""
# Enable stanza initialisation if the backup settings were fixed after being invalid
# or pointing to a repository where there are backups from another cluster.
if self.charm.is_blocked and self.charm.unit.status.message not in S3_BLOCK_MESSAGES:
if self.charm.is_blocked and not is_s3_block_message(self.charm.unit.status.message):
logger.warning("couldn't initialize stanza due to a blocked status, deferring event")
event.defer()
return False
Expand All @@ -575,9 +633,15 @@ def _initialise_stanza(self, event: HookEvent) -> bool:
f"--stanza={self.stanza_name}",
"stanza-create",
])
except ExecError:
logger.exception("Failed to initialise stanza:")
self._s3_initialization_set_failure(FAILED_TO_INITIALIZE_STANZA_ERROR_MESSAGE)
except ExecError as e:
logger.error("Failed to initialise stanza: stdout=%s, stderr=%s", e.stdout, e.stderr)
error_hint = self._extract_error_message(e.stdout, e.stderr, for_status=True)
block_message = (
f"{FAILED_TO_INITIALIZE_STANZA_ERROR_MESSAGE}: {error_hint}"
if error_hint
else FAILED_TO_INITIALIZE_STANZA_ERROR_MESSAGE
)
self._s3_initialization_set_failure(block_message)
return False

self.start_stop_pgbackrest_service()
Expand Down Expand Up @@ -615,6 +679,16 @@ def check_stanza(self) -> bool:
with attempt:
self._execute_command(["pgbackrest", f"--stanza={self.stanza_name}", "check"])
self.charm._set_active_status()
except ExecError as e:
logger.error("Failed to check stanza: stdout=%s, stderr=%s", e.stdout, e.stderr)
error_hint = self._extract_error_message(e.stdout, e.stderr, for_status=True)
block_message = (
f"{FAILED_TO_INITIALIZE_STANZA_ERROR_MESSAGE}: {error_hint}"
if error_hint
else FAILED_TO_INITIALIZE_STANZA_ERROR_MESSAGE
)
self._s3_initialization_set_failure(block_message)
return False
except Exception:
logger.exception("Failed to check stanza:")
self._s3_initialization_set_failure(FAILED_TO_INITIALIZE_STANZA_ERROR_MESSAGE)
Expand Down Expand Up @@ -916,7 +990,7 @@ def _on_s3_credential_gone(self, _) -> None:
"s3-initialization-done": "",
"s3-initialization-block-message": "",
})
if self.charm.is_blocked and self.charm.unit.status.message in S3_BLOCK_MESSAGES:
if self.charm.is_blocked and is_s3_block_message(self.charm.unit.status.message):
self.charm._set_active_status()

def _on_list_backups_action(self, event) -> None:
Expand Down
4 changes: 2 additions & 2 deletions src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@
from requests import ConnectionError as RequestsConnectionError
from tenacity import RetryError, Retrying, stop_after_attempt, stop_after_delay, wait_fixed

from backups import CANNOT_RESTORE_PITR, S3_BLOCK_MESSAGES, PostgreSQLBackups
from backups import CANNOT_RESTORE_PITR, PostgreSQLBackups, is_s3_block_message
from config import CharmConfig
from constants import (
APP_SCOPE,
Expand Down Expand Up @@ -1504,7 +1504,7 @@ def _on_update_status_early_exit_checks(self, container) -> bool:
self._check_pgdata_storage_size()

if (
self._has_blocked_status and self.unit.status not in S3_BLOCK_MESSAGES
self._has_blocked_status and not is_s3_block_message(self.unit.status.message)
) or self._has_non_restore_waiting_status:
# If charm was failing to disable plugin, try again and continue (user may have removed the objects)
if self.unit.status.message == EXTENSION_OBJECT_MESSAGE:
Expand Down
1 change: 1 addition & 0 deletions src/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,4 @@
)

PGBACKREST_LOGROTATE_FILE = "/etc/logrotate.d/pgbackrest.logrotate"
PGBACKREST_LOGS_PATH = "/var/log/pgbackrest"
5 changes: 4 additions & 1 deletion tests/integration/test_backups_gcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,5 +264,8 @@ async def test_block_on_missing_region(
logger.info("waiting for the database charm to become blocked")
unit = ops_test.model.units.get(f"{database_app_name}/0")
await ops_test.model.block_until(
lambda: unit.workload_status_message == FAILED_TO_INITIALIZE_STANZA_ERROR_MESSAGE
lambda: (
unit.workload_status_message
and unit.workload_status_message.startswith(FAILED_TO_INITIALIZE_STANZA_ERROR_MESSAGE)
)
)
4 changes: 2 additions & 2 deletions tests/unit/test_backups.py
Original file line number Diff line number Diff line change
Expand Up @@ -744,7 +744,7 @@ def test_initialise_stanza(harness):
# Only the leader will display the blocked status.
assert isinstance(harness.charm.unit.status, MaintenanceStatus)
_s3_initialization_set_failure.assert_called_once_with(
FAILED_TO_INITIALIZE_STANZA_ERROR_MESSAGE
f"{FAILED_TO_INITIALIZE_STANZA_ERROR_MESSAGE}: fake error"
)

# Test when the archiving is working correctly (pgBackRest check command succeeds)
Expand Down Expand Up @@ -809,7 +809,7 @@ def test_check_stanza(harness):
_reload_patroni_configuration.assert_not_called()
_set_active_status.assert_not_called()
_s3_initialization_set_failure.assert_called_once_with(
FAILED_TO_INITIALIZE_STANZA_ERROR_MESSAGE
f"{FAILED_TO_INITIALIZE_STANZA_ERROR_MESSAGE}: fake error"
)

_execute_command.reset_mock()
Expand Down
Loading