Skip to content

Commit 9813b49

Browse files
committed
Extract warning and error messages from pgBackRest logs
Sometimes the error message is sent to the stdout instead of the stderr, so it's important to check both Signed-off-by: Marcelo Henrique Neppel <marcelo.neppel@canonical.com>
1 parent 28b6219 commit 9813b49

File tree

2 files changed

+130
-10
lines changed

2 files changed

+130
-10
lines changed

src/backups.py

Lines changed: 53 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,8 @@ def can_use_s3_repository(self) -> tuple[bool, str]:
195195

196196
else:
197197
if return_code != 0:
198-
logger.error(f"Failed to run pgbackrest: {stderr}")
198+
extracted_error = self._extract_error_message(stdout, stderr)
199+
logger.error(f"Failed to run pgbackrest: {extracted_error}")
199200
return False, FAILED_TO_INITIALIZE_STANZA_ERROR_MESSAGE
200201

201202
for stanza in json.loads(stdout):
@@ -357,6 +358,41 @@ def _execute_command(
357358
)
358359
return process.returncode, process.stdout.decode(), process.stderr.decode()
359360

361+
@staticmethod
362+
def _extract_error_message(stdout: str, stderr: str) -> str:
363+
"""Extract key error message from pgBackRest output.
364+
365+
Args:
366+
stdout: Standard output from pgBackRest command.
367+
stderr: Standard error from pgBackRest command.
368+
369+
Returns:
370+
Extracted error message, prioritizing ERROR/WARN lines from output.
371+
"""
372+
combined_output = f"{stdout}\n{stderr}".strip()
373+
if not combined_output:
374+
return f"Unknown error occurred. Please check the logs at {PGBACKREST_LOGS_PATH}"
375+
376+
# Extract lines with ERROR or WARN markers from pgBackRest output
377+
error_lines = []
378+
for line in combined_output.splitlines():
379+
if "ERROR:" in line or "WARN:" in line:
380+
# Clean up the line by removing debug prefixes like "P00 ERROR:"
381+
cleaned = re.sub(r"^.*?(ERROR:|WARN:)", r"\1", line).strip()
382+
error_lines.append(cleaned)
383+
384+
# If we found error/warning lines, return them joined
385+
if error_lines:
386+
return " ".join(error_lines)
387+
388+
# Otherwise return the last non-empty line from stderr or stdout
389+
if stderr.strip():
390+
return stderr.strip().splitlines()[-1]
391+
if stdout.strip():
392+
return stdout.strip().splitlines()[-1]
393+
394+
return f"Unknown error occurred. Please check the logs at {PGBACKREST_LOGS_PATH}"
395+
360396
def _format_backup_list(self, backup_list) -> str:
361397
"""Formats provided list of backups as a table."""
362398
s3_parameters, _ = self._retrieve_s3_parameters()
@@ -405,7 +441,8 @@ def _generate_backup_list_output(self) -> str:
405441
"--output=json",
406442
])
407443
if return_code != 0:
408-
raise ListBackupsError(f"Failed to list backups with error: {stderr}")
444+
extracted_error = self._extract_error_message(output, stderr)
445+
raise ListBackupsError(f"Failed to list backups with error: {extracted_error}")
409446

410447
backups = json.loads(output)[0]["backup"]
411448
for backup in backups:
@@ -476,7 +513,8 @@ def _list_backups(self, show_failed: bool, parse=True) -> dict[str, tuple[str, s
476513
"--output=json",
477514
])
478515
if return_code != 0:
479-
raise ListBackupsError(f"Failed to list backups with error: {stderr}")
516+
extracted_error = self._extract_error_message(output, stderr)
517+
raise ListBackupsError(f"Failed to list backups with error: {extracted_error}")
480518

481519
repository_info = next(iter(json.loads(output)), None)
482520

@@ -511,7 +549,8 @@ def _list_timelines(self) -> dict[str, tuple[str, str]]:
511549
"--output=json",
512550
])
513551
if return_code != 0:
514-
raise ListBackupsError(f"Failed to list repository with error: {stderr}")
552+
extracted_error = self._extract_error_message(output, stderr)
553+
raise ListBackupsError(f"Failed to list repository with error: {extracted_error}")
515554

516555
repository = json.loads(output).items()
517556
if repository is None:
@@ -733,15 +772,16 @@ def _is_primary_pgbackrest_service_running(self) -> bool:
733772
if not self.charm.primary_endpoint:
734773
logger.warning("Failed to contact pgBackRest TLS server: no primary endpoint")
735774
return False
736-
return_code, _, stderr = self._execute_command([
775+
return_code, stdout, stderr = self._execute_command([
737776
PGBACKREST_EXECUTABLE,
738777
"server-ping",
739778
"--io-timeout=10",
740779
self.charm.primary_endpoint,
741780
])
742781
if return_code != 0:
782+
extracted_error = self._extract_error_message(stdout, stderr)
743783
logger.warning(
744-
f"Failed to contact pgBackRest TLS server on {self.charm.primary_endpoint} with error {stderr}"
784+
f"Failed to contact pgBackRest TLS server on {self.charm.primary_endpoint} with error {extracted_error}"
745785
)
746786
return return_code == 0
747787

@@ -967,7 +1007,8 @@ def _run_backup(
9671007
f"backup/{self.stanza_name}/{backup_id}/backup.log",
9681008
s3_parameters,
9691009
)
970-
error_message = f"Failed to backup PostgreSQL with error: {stderr}"
1010+
extracted_error = self._extract_error_message(stdout, stderr)
1011+
error_message = f"Failed to backup PostgreSQL with error: {extracted_error}"
9711012
logger.error(f"Backup failed: {error_message}")
9721013
event.fail(error_message)
9731014
else:
@@ -1122,7 +1163,7 @@ def _on_restore_action(self, event): # noqa: C901
11221163

11231164
# Remove previous cluster information to make it possible to initialise a new cluster.
11241165
logger.info("Removing previous cluster information")
1125-
return_code, _, stderr = self._execute_command(
1166+
return_code, stdout, stderr = self._execute_command(
11261167
[
11271168
"charmed-postgresql.patronictl",
11281169
"-c",
@@ -1134,7 +1175,10 @@ def _on_restore_action(self, event): # noqa: C901
11341175
timeout=10,
11351176
)
11361177
if return_code != 0:
1137-
error_message = f"Failed to remove previous cluster information with error: {stderr}"
1178+
extracted_error = self._extract_error_message(stdout, stderr)
1179+
error_message = (
1180+
f"Failed to remove previous cluster information with error: {extracted_error}"
1181+
)
11381182
logger.error(f"Restore failed: {error_message}")
11391183
event.fail(error_message)
11401184
return

tests/unit/test_backups.py

Lines changed: 77 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from ops.testing import Harness
1414
from tenacity import RetryError, wait_fixed
1515

16-
from backups import ListBackupsError
16+
from backups import ListBackupsError, PostgreSQLBackups
1717
from charm import PostgresqlOperatorCharm
1818
from constants import PEER
1919

@@ -37,6 +37,82 @@ def harness():
3737
harness.cleanup()
3838

3939

40+
def test_extract_error_message_with_error_in_stdout():
41+
"""Test extracting error from stdout with ERROR marker."""
42+
stdout = """2025-11-07 07:21:11.120 P00 ERROR: [056]: unable to find primary cluster - cannot proceed
43+
HINT: are all available clusters in recovery?"""
44+
stderr = ""
45+
result = PostgreSQLBackups._extract_error_message(stdout, stderr)
46+
assert result == "ERROR: [056]: unable to find primary cluster - cannot proceed"
47+
48+
49+
def test_extract_error_message_with_error_in_stderr():
50+
"""Test extracting error from stderr when no ERROR marker."""
51+
stdout = ""
52+
stderr = "Connection refused: cannot connect to S3"
53+
result = PostgreSQLBackups._extract_error_message(stdout, stderr)
54+
assert result == "Connection refused: cannot connect to S3"
55+
56+
57+
def test_extract_error_message_with_both_stdout_and_stderr():
58+
"""Test extracting error when ERROR marker is in stdout and stderr has additional info."""
59+
stdout = "P00 ERROR: database error occurred"
60+
stderr = "Additional stderr info"
61+
result = PostgreSQLBackups._extract_error_message(stdout, stderr)
62+
assert result == "ERROR: database error occurred"
63+
64+
65+
def test_extract_error_message_with_warning():
66+
"""Test extracting warning message from output."""
67+
stdout = "P00 WARN: configuration issue detected"
68+
stderr = ""
69+
result = PostgreSQLBackups._extract_error_message(stdout, stderr)
70+
assert result == "WARN: configuration issue detected"
71+
72+
73+
def test_extract_error_message_with_multiple_errors():
74+
"""Test extracting multiple ERROR/WARN lines."""
75+
stdout = """P00 ERROR: first error
76+
P00 WARN: warning message
77+
P00 ERROR: second error"""
78+
stderr = ""
79+
result = PostgreSQLBackups._extract_error_message(stdout, stderr)
80+
assert result == "ERROR: first error WARN: warning message ERROR: second error"
81+
82+
83+
def test_extract_error_message_with_empty_output():
84+
"""Test with empty stdout and stderr returns helpful message."""
85+
result = PostgreSQLBackups._extract_error_message("", "")
86+
assert (
87+
result
88+
== "Unknown error occurred. Please check the logs at /var/snap/charmed-postgresql/common/var/log/pgbackrest"
89+
)
90+
91+
92+
def test_extract_error_message_fallback_to_stdout_last_line():
93+
"""Test fallback to last line of stdout when no ERROR/WARN markers."""
94+
stdout = "Some generic output\nMore details here\nFinal error message"
95+
stderr = ""
96+
result = PostgreSQLBackups._extract_error_message(stdout, stderr)
97+
assert result == "Final error message"
98+
99+
100+
def test_extract_error_message_fallback_to_stderr_last_line():
101+
"""Test fallback to last line of stderr when no ERROR/WARN markers and no stdout."""
102+
stdout = ""
103+
stderr = "Line 1\nLine 2\nFinal error message"
104+
result = PostgreSQLBackups._extract_error_message(stdout, stderr)
105+
assert result == "Final error message"
106+
107+
108+
def test_extract_error_message_cleans_debug_prefix():
109+
"""Test that debug prefixes like 'P00 ERROR:' are cleaned up."""
110+
stdout = "2025-11-07 07:21:11.120 P00 ERROR: test error message"
111+
stderr = ""
112+
result = PostgreSQLBackups._extract_error_message(stdout, stderr)
113+
assert result == "ERROR: test error message"
114+
115+
40116
def test_stanza_name(harness):
41117
assert (
42118
harness.charm.backup.stanza_name

0 commit comments

Comments
 (0)