Skip to content
Merged
62 changes: 53 additions & 9 deletions src/backups.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,8 @@ def can_use_s3_repository(self) -> tuple[bool, str]:

else:
if return_code != 0:
logger.error(f"Failed to run pgbackrest: {stderr}")
extracted_error = self._extract_error_message(stdout, stderr)
logger.error(f"Failed to run pgbackrest: {extracted_error}")
return False, FAILED_TO_INITIALIZE_STANZA_ERROR_MESSAGE

for stanza in json.loads(stdout):
Expand Down Expand Up @@ -357,6 +358,41 @@ def _execute_command(
)
return process.returncode, process.stdout.decode(), process.stderr.decode()

@staticmethod
def _extract_error_message(stdout: str, stderr: str) -> str:
"""Extract key error message from pgBackRest output.

Args:
stdout: Standard output from pgBackRest command.
stderr: Standard error from pgBackRest command.

Returns:
Extracted error message, prioritizing ERROR/WARN lines from output.
"""
combined_output = f"{stdout}\n{stderr}".strip()
if not combined_output:
return f"Unknown error occurred. Please check the logs at {PGBACKREST_LOGS_PATH}"

# Extract lines with ERROR or WARN markers from pgBackRest output
error_lines = []
for line in combined_output.splitlines():
if "ERROR:" in line or "WARN:" in line:
# Clean up the line by removing debug prefixes like "P00 ERROR:"
cleaned = re.sub(r"^.*?(ERROR:|WARN:)", r"\1", line).strip()
error_lines.append(cleaned)

# If we found error/warning lines, return them joined
if error_lines:
return "; ".join(error_lines)

# Otherwise return the last non-empty line from stderr or stdout
if stderr.strip():
return stderr.strip().splitlines()[-1]
if stdout.strip():
return stdout.strip().splitlines()[-1]

return f"Unknown error occurred. Please check the logs at {PGBACKREST_LOGS_PATH}"

def _format_backup_list(self, backup_list) -> str:
"""Formats provided list of backups as a table."""
s3_parameters, _ = self._retrieve_s3_parameters()
Expand Down Expand Up @@ -405,7 +441,8 @@ def _generate_backup_list_output(self) -> str:
"--output=json",
])
if return_code != 0:
raise ListBackupsError(f"Failed to list backups with error: {stderr}")
extracted_error = self._extract_error_message(output, stderr)
raise ListBackupsError(f"Failed to list backups with error: {extracted_error}")

backups = json.loads(output)[0]["backup"]
for backup in backups:
Expand Down Expand Up @@ -476,7 +513,8 @@ def _list_backups(self, show_failed: bool, parse=True) -> dict[str, tuple[str, s
"--output=json",
])
if return_code != 0:
raise ListBackupsError(f"Failed to list backups with error: {stderr}")
extracted_error = self._extract_error_message(output, stderr)
raise ListBackupsError(f"Failed to list backups with error: {extracted_error}")

repository_info = next(iter(json.loads(output)), None)

Expand Down Expand Up @@ -511,7 +549,8 @@ def _list_timelines(self) -> dict[str, tuple[str, str]]:
"--output=json",
])
if return_code != 0:
raise ListBackupsError(f"Failed to list repository with error: {stderr}")
extracted_error = self._extract_error_message(output, stderr)
raise ListBackupsError(f"Failed to list repository with error: {extracted_error}")

repository = json.loads(output).items()
if repository is None:
Expand Down Expand Up @@ -733,15 +772,16 @@ def _is_primary_pgbackrest_service_running(self) -> bool:
if not self.charm.primary_endpoint:
logger.warning("Failed to contact pgBackRest TLS server: no primary endpoint")
return False
return_code, _, stderr = self._execute_command([
return_code, stdout, stderr = self._execute_command([
PGBACKREST_EXECUTABLE,
"server-ping",
"--io-timeout=10",
self.charm.primary_endpoint,
])
if return_code != 0:
extracted_error = self._extract_error_message(stdout, stderr)
logger.warning(
f"Failed to contact pgBackRest TLS server on {self.charm.primary_endpoint} with error {stderr}"
f"Failed to contact pgBackRest TLS server on {self.charm.primary_endpoint} with error {extracted_error}"
)
return return_code == 0

Expand Down Expand Up @@ -967,7 +1007,8 @@ def _run_backup(
f"backup/{self.stanza_name}/{backup_id}/backup.log",
s3_parameters,
)
error_message = f"Failed to backup PostgreSQL with error: {stderr}"
extracted_error = self._extract_error_message(stdout, stderr)
error_message = f"Failed to backup PostgreSQL with error: {extracted_error}"
logger.error(f"Backup failed: {error_message}")
event.fail(error_message)
else:
Expand Down Expand Up @@ -1122,7 +1163,7 @@ def _on_restore_action(self, event): # noqa: C901

# Remove previous cluster information to make it possible to initialise a new cluster.
logger.info("Removing previous cluster information")
return_code, _, stderr = self._execute_command(
return_code, stdout, stderr = self._execute_command(
[
"charmed-postgresql.patronictl",
"-c",
Expand All @@ -1134,7 +1175,10 @@ def _on_restore_action(self, event): # noqa: C901
timeout=10,
)
if return_code != 0:
error_message = f"Failed to remove previous cluster information with error: {stderr}"
extracted_error = self._extract_error_message(stdout, stderr)
error_message = (
f"Failed to remove previous cluster information with error: {extracted_error}"
)
logger.error(f"Restore failed: {error_message}")
event.fail(error_message)
return
Expand Down
78 changes: 77 additions & 1 deletion tests/unit/test_backups.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from ops.testing import Harness
from tenacity import RetryError, wait_fixed

from backups import ListBackupsError
from backups import ListBackupsError, PostgreSQLBackups
from charm import PostgresqlOperatorCharm
from constants import PEER

Expand All @@ -37,6 +37,82 @@ def harness():
harness.cleanup()


def test_extract_error_message_with_error_in_stdout():
"""Test extracting error from stdout with ERROR marker."""
stdout = """2025-11-07 07:21:11.120 P00 ERROR: [056]: unable to find primary cluster - cannot proceed
HINT: are all available clusters in recovery?"""
stderr = ""
result = PostgreSQLBackups._extract_error_message(stdout, stderr)
assert result == "ERROR: [056]: unable to find primary cluster - cannot proceed"


def test_extract_error_message_with_error_in_stderr():
"""Test extracting error from stderr when no ERROR marker."""
stdout = ""
stderr = "Connection refused: cannot connect to S3"
result = PostgreSQLBackups._extract_error_message(stdout, stderr)
assert result == "Connection refused: cannot connect to S3"


def test_extract_error_message_with_both_stdout_and_stderr():
"""Test extracting error when ERROR marker is in stdout and stderr has additional info."""
stdout = "P00 ERROR: database error occurred"
stderr = "Additional stderr info"
result = PostgreSQLBackups._extract_error_message(stdout, stderr)
assert result == "ERROR: database error occurred"


def test_extract_error_message_with_warning():
"""Test extracting warning message from output."""
stdout = "P00 WARN: configuration issue detected"
stderr = ""
result = PostgreSQLBackups._extract_error_message(stdout, stderr)
assert result == "WARN: configuration issue detected"


def test_extract_error_message_with_multiple_errors():
"""Test extracting multiple ERROR/WARN lines."""
stdout = """P00 ERROR: first error
P00 WARN: warning message
P00 ERROR: second error"""
stderr = ""
result = PostgreSQLBackups._extract_error_message(stdout, stderr)
assert result == "ERROR: first error; WARN: warning message; ERROR: second error"


def test_extract_error_message_with_empty_output():
"""Test with empty stdout and stderr returns helpful message."""
result = PostgreSQLBackups._extract_error_message("", "")
assert (
result
== "Unknown error occurred. Please check the logs at /var/snap/charmed-postgresql/common/var/log/pgbackrest"
)


def test_extract_error_message_fallback_to_stdout_last_line():
"""Test fallback to last line of stdout when no ERROR/WARN markers."""
stdout = "Some generic output\nMore details here\nFinal error message"
stderr = ""
result = PostgreSQLBackups._extract_error_message(stdout, stderr)
assert result == "Final error message"


def test_extract_error_message_fallback_to_stderr_last_line():
"""Test fallback to last line of stderr when no ERROR/WARN markers and no stdout."""
stdout = ""
stderr = "Line 1\nLine 2\nFinal error message"
result = PostgreSQLBackups._extract_error_message(stdout, stderr)
assert result == "Final error message"


def test_extract_error_message_cleans_debug_prefix():
"""Test that debug prefixes like 'P00 ERROR:' are cleaned up."""
stdout = "2025-11-07 07:21:11.120 P00 ERROR: test error message"
stderr = ""
result = PostgreSQLBackups._extract_error_message(stdout, stderr)
assert result == "ERROR: test error message"


def test_stanza_name(harness):
assert (
harness.charm.backup.stanza_name
Expand Down
Loading