Skip to content

Commit d3a46b6

Browse files
committed
Reapply "Standardize pgBackRest logging to use stderr consistently"
This reverts commit 1c3eb04. Signed-off-by: Marcelo Henrique Neppel <marcelo.neppel@canonical.com>
1 parent a4e61fc commit d3a46b6

File tree

3 files changed

+60
-66
lines changed

3 files changed

+60
-66
lines changed

src/backups.py

Lines changed: 36 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
PGBACKREST_CONF_PATH,
4040
PGBACKREST_CONFIGURATION_FILE,
4141
PGBACKREST_EXECUTABLE,
42+
PGBACKREST_LOG_LEVEL_STDERR,
4243
PGBACKREST_LOGROTATE_FILE,
4344
PGBACKREST_LOGS_PATH,
4445
POSTGRESQL_DATA_PATH,
@@ -184,7 +185,13 @@ def can_use_s3_repository(self) -> tuple[bool, str]:
184185

185186
try:
186187
return_code, stdout, stderr = self._execute_command(
187-
[PGBACKREST_EXECUTABLE, PGBACKREST_CONFIGURATION_FILE, "info", "--output=json"],
188+
[
189+
PGBACKREST_EXECUTABLE,
190+
PGBACKREST_CONFIGURATION_FILE,
191+
PGBACKREST_LOG_LEVEL_STDERR,
192+
"info",
193+
"--output=json",
194+
],
188195
timeout=30,
189196
)
190197
except TimeoutExpired as e:
@@ -196,7 +203,7 @@ def can_use_s3_repository(self) -> tuple[bool, str]:
196203

197204
else:
198205
if return_code != 0:
199-
extracted_error = self._extract_error_message(stdout, stderr)
206+
extracted_error = self._extract_error_message(stderr)
200207
logger.error(f"Failed to run pgbackrest: {extracted_error}")
201208
return False, FAILED_TO_INITIALIZE_STANZA_ERROR_MESSAGE
202209

@@ -360,23 +367,25 @@ def _execute_command(
360367
return process.returncode, process.stdout.decode(), process.stderr.decode()
361368

362369
@staticmethod
363-
def _extract_error_message(stdout: str, stderr: str) -> str:
364-
"""Extract key error message from pgBackRest output.
370+
def _extract_error_message(stderr: str) -> str:
371+
"""Extract key error message from pgBackRest stderr output.
372+
373+
Since we standardize all pgBackRest commands to use --log-level-stderr=warn,
374+
all errors and warnings are consistently written to stderr. This makes error
375+
extraction predictable and avoids potential log duplication issues.
365376
366377
Args:
367-
stdout: Standard output from pgBackRest command.
368-
stderr: Standard error from pgBackRest command.
378+
stderr: Standard error from pgBackRest command containing errors/warnings.
369379
370380
Returns:
371-
Extracted error message, prioritizing ERROR/WARN lines from output.
381+
Extracted error message from stderr, prioritizing ERROR/WARN lines.
372382
"""
373-
combined_output = f"{stdout}\n{stderr}".strip()
374-
if not combined_output:
383+
if not stderr.strip():
375384
return f"Unknown error occurred. Please check the logs at {PGBACKREST_LOGS_PATH}"
376385

377-
# Extract lines with ERROR or WARN markers from pgBackRest output
386+
# Extract lines with ERROR or WARN markers from pgBackRest stderr output
378387
error_lines = []
379-
for line in combined_output.splitlines():
388+
for line in stderr.splitlines():
380389
if "ERROR:" in line or "WARN:" in line:
381390
# Clean up the line by removing debug prefixes like "P00 ERROR:"
382391
cleaned = re.sub(r"^.*?(ERROR:|WARN:)", r"\1", line).strip()
@@ -386,13 +395,8 @@ def _extract_error_message(stdout: str, stderr: str) -> str:
386395
if error_lines:
387396
return "; ".join(error_lines)
388397

389-
# Otherwise return the last non-empty line from stderr or stdout
390-
if stderr.strip():
391-
return stderr.strip().splitlines()[-1]
392-
if stdout.strip():
393-
return stdout.strip().splitlines()[-1]
394-
395-
return f"Unknown error occurred. Please check the logs at {PGBACKREST_LOGS_PATH}"
398+
# Otherwise return the last non-empty line from stderr
399+
return stderr.strip().splitlines()[-1]
396400

397401
def _format_backup_list(self, backup_list) -> str:
398402
"""Formats provided list of backups as a table."""
@@ -438,11 +442,12 @@ def _generate_backup_list_output(self) -> str:
438442
return_code, output, stderr = self._execute_command([
439443
PGBACKREST_EXECUTABLE,
440444
PGBACKREST_CONFIGURATION_FILE,
445+
PGBACKREST_LOG_LEVEL_STDERR,
441446
"info",
442447
"--output=json",
443448
])
444449
if return_code != 0:
445-
extracted_error = self._extract_error_message(output, stderr)
450+
extracted_error = self._extract_error_message(stderr)
446451
raise ListBackupsError(f"Failed to list backups with error: {extracted_error}")
447452

448453
backups = json.loads(output)[0]["backup"]
@@ -510,11 +515,12 @@ def _list_backups(self, show_failed: bool, parse=True) -> dict[str, tuple[str, s
510515
return_code, output, stderr = self._execute_command([
511516
PGBACKREST_EXECUTABLE,
512517
PGBACKREST_CONFIGURATION_FILE,
518+
PGBACKREST_LOG_LEVEL_STDERR,
513519
"info",
514520
"--output=json",
515521
])
516522
if return_code != 0:
517-
extracted_error = self._extract_error_message(output, stderr)
523+
extracted_error = self._extract_error_message(stderr)
518524
raise ListBackupsError(f"Failed to list backups with error: {extracted_error}")
519525

520526
repository_info = next(iter(json.loads(output)), None)
@@ -545,12 +551,13 @@ def _list_timelines(self) -> dict[str, tuple[str, str]]:
545551
return_code, output, stderr = self._execute_command([
546552
PGBACKREST_EXECUTABLE,
547553
PGBACKREST_CONFIGURATION_FILE,
554+
PGBACKREST_LOG_LEVEL_STDERR,
548555
"repo-ls",
549556
"--recurse",
550557
"--output=json",
551558
])
552559
if return_code != 0:
553-
extracted_error = self._extract_error_message(output, stderr)
560+
extracted_error = self._extract_error_message(stderr)
554561
raise ListBackupsError(f"Failed to list repository with error: {extracted_error}")
555562

556563
repository = json.loads(output).items()
@@ -663,6 +670,7 @@ def _initialise_stanza(self, event: HookEvent) -> bool:
663670
return_code, _, stderr = self._execute_command([
664671
PGBACKREST_EXECUTABLE,
665672
PGBACKREST_CONFIGURATION_FILE,
673+
PGBACKREST_LOG_LEVEL_STDERR,
666674
f"--stanza={self.stanza_name}",
667675
"stanza-create",
668676
])
@@ -721,14 +729,15 @@ def check_stanza(self) -> bool:
721729
return_code, stdout, stderr = self._execute_command([
722730
PGBACKREST_EXECUTABLE,
723731
PGBACKREST_CONFIGURATION_FILE,
732+
PGBACKREST_LOG_LEVEL_STDERR,
724733
f"--stanza={self.stanza_name}",
725734
"check",
726735
])
727736
if return_code == PGBACKREST_ARCHIVE_TIMEOUT_ERROR_CODE:
728737
# Raise an error if the archive command timeouts, so the user has the possibility
729738
# to fix network issues and call juju resolve to re-trigger the hook that calls
730739
# this method.
731-
extracted_error = self._extract_error_message(stdout, stderr)
740+
extracted_error = self._extract_error_message(stderr)
732741
logger.error(
733742
f"error: {extracted_error} - please fix the error and call juju resolve on this unit"
734743
)
@@ -787,12 +796,13 @@ def _is_primary_pgbackrest_service_running(self) -> bool:
787796
return False
788797
return_code, stdout, stderr = self._execute_command([
789798
PGBACKREST_EXECUTABLE,
799+
PGBACKREST_LOG_LEVEL_STDERR,
790800
"server-ping",
791801
"--io-timeout=10",
792802
self.charm.primary_endpoint,
793803
])
794804
if return_code != 0:
795-
extracted_error = self._extract_error_message(stdout, stderr)
805+
extracted_error = self._extract_error_message(stderr)
796806
logger.warning(
797807
f"Failed to contact pgBackRest TLS server on {self.charm.primary_endpoint} with error {extracted_error}"
798808
)
@@ -984,8 +994,8 @@ def _run_backup(
984994
command = [
985995
PGBACKREST_EXECUTABLE,
986996
PGBACKREST_CONFIGURATION_FILE,
997+
PGBACKREST_LOG_LEVEL_STDERR,
987998
f"--stanza={self.stanza_name}",
988-
"--log-level-console=debug",
989999
f"--type={BACKUP_TYPE_OVERRIDES[backup_type]}",
9901000
"backup",
9911001
]
@@ -1020,7 +1030,7 @@ def _run_backup(
10201030
f"backup/{self.stanza_name}/{backup_id}/backup.log",
10211031
s3_parameters,
10221032
)
1023-
extracted_error = self._extract_error_message(stdout, stderr)
1033+
extracted_error = self._extract_error_message(stderr)
10241034
error_message = f"Failed to backup PostgreSQL with error: {extracted_error}"
10251035
logger.error(f"Backup failed: {error_message}")
10261036
event.fail(error_message)
@@ -1188,7 +1198,7 @@ def _on_restore_action(self, event): # noqa: C901
11881198
timeout=10,
11891199
)
11901200
if return_code != 0:
1191-
extracted_error = self._extract_error_message(stdout, stderr)
1201+
extracted_error = self._extract_error_message(stderr)
11921202
error_message = (
11931203
f"Failed to remove previous cluster information with error: {extracted_error}"
11941204
)

src/constants.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,12 @@
2626

2727
# Snap constants.
2828
PGBACKREST_EXECUTABLE = "charmed-postgresql.pgbackrest"
29+
# pgBackRest logging configuration
30+
# We use stderr for all error/warning output to have a consistent, predictable error extraction
31+
# mechanism. By default, pgBackRest uses stdout (console) for warnings, but we standardize on
32+
# stderr to avoid potential log duplication and to make error handling more reliable.
33+
# Reference: https://pgbackrest.org/configuration.html#section-log
34+
PGBACKREST_LOG_LEVEL_STDERR = "--log-level-stderr=warn"
2935
# pgBackRest error codes
3036
PGBACKREST_ARCHIVE_TIMEOUT_ERROR_CODE = (
3137
82 # Archive timeout - unable to archive WAL files within configured timeout period

tests/unit/test_backups.py

Lines changed: 18 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -37,79 +37,57 @@ def harness():
3737
harness.cleanup()
3838

3939

40-
def test_extract_error_message_with_error_in_stdout():
41-
"""Test extracting error from stdout with ERROR marker."""
42-
stdout = """2025-11-07 07:21:11.120 P00 ERROR: [056]: unable to find primary cluster - cannot proceed
40+
def test_extract_error_message_with_error_in_stderr():
41+
"""Test extracting error from stderr with ERROR marker."""
42+
stderr = """2025-11-07 07:21:11.120 P00 ERROR: [056]: unable to find primary cluster - cannot proceed
4343
HINT: are all available clusters in recovery?"""
44-
stderr = ""
45-
result = PostgreSQLBackups._extract_error_message(stdout, stderr)
44+
result = PostgreSQLBackups._extract_error_message(stderr)
4645
assert result == "ERROR: [056]: unable to find primary cluster - cannot proceed"
4746

4847

49-
def test_extract_error_message_with_error_in_stderr():
48+
def test_extract_error_message_with_plain_stderr():
5049
"""Test extracting error from stderr when no ERROR marker."""
51-
stdout = ""
5250
stderr = "Connection refused: cannot connect to S3"
53-
result = PostgreSQLBackups._extract_error_message(stdout, stderr)
51+
result = PostgreSQLBackups._extract_error_message(stderr)
5452
assert result == "Connection refused: cannot connect to S3"
5553

5654

57-
def test_extract_error_message_with_both_stdout_and_stderr():
58-
"""Test extracting error when ERROR marker is in stdout and stderr has additional info."""
59-
stdout = "P00 ERROR: database error occurred"
60-
stderr = "Additional stderr info"
61-
result = PostgreSQLBackups._extract_error_message(stdout, stderr)
62-
assert result == "ERROR: database error occurred"
63-
64-
6555
def test_extract_error_message_with_warning():
66-
"""Test extracting warning message from output."""
67-
stdout = "P00 WARN: configuration issue detected"
68-
stderr = ""
69-
result = PostgreSQLBackups._extract_error_message(stdout, stderr)
56+
"""Test extracting warning message from stderr."""
57+
stderr = "P00 WARN: configuration issue detected"
58+
result = PostgreSQLBackups._extract_error_message(stderr)
7059
assert result == "WARN: configuration issue detected"
7160

7261

7362
def test_extract_error_message_with_multiple_errors():
74-
"""Test extracting multiple ERROR/WARN lines."""
75-
stdout = """P00 ERROR: first error
63+
"""Test extracting multiple ERROR/WARN lines from stderr."""
64+
stderr = """P00 ERROR: first error
7665
P00 WARN: warning message
7766
P00 ERROR: second error"""
78-
stderr = ""
79-
result = PostgreSQLBackups._extract_error_message(stdout, stderr)
67+
result = PostgreSQLBackups._extract_error_message(stderr)
8068
assert result == "ERROR: first error; WARN: warning message; ERROR: second error"
8169

8270

8371
def test_extract_error_message_with_empty_output():
84-
"""Test with empty stdout and stderr returns helpful message."""
85-
result = PostgreSQLBackups._extract_error_message("", "")
72+
"""Test with empty stderr returns helpful message."""
73+
result = PostgreSQLBackups._extract_error_message("")
8674
assert (
8775
result
8876
== "Unknown error occurred. Please check the logs at /var/snap/charmed-postgresql/common/var/log/pgbackrest"
8977
)
9078

9179

92-
def test_extract_error_message_fallback_to_stdout_last_line():
93-
"""Test fallback to last line of stdout when no ERROR/WARN markers."""
94-
stdout = "Some generic output\nMore details here\nFinal error message"
95-
stderr = ""
96-
result = PostgreSQLBackups._extract_error_message(stdout, stderr)
97-
assert result == "Final error message"
98-
99-
10080
def test_extract_error_message_fallback_to_stderr_last_line():
101-
"""Test fallback to last line of stderr when no ERROR/WARN markers and no stdout."""
102-
stdout = ""
81+
"""Test fallback to last line of stderr when no ERROR/WARN markers."""
10382
stderr = "Line 1\nLine 2\nFinal error message"
104-
result = PostgreSQLBackups._extract_error_message(stdout, stderr)
83+
result = PostgreSQLBackups._extract_error_message(stderr)
10584
assert result == "Final error message"
10685

10786

10887
def test_extract_error_message_cleans_debug_prefix():
10988
"""Test that debug prefixes like 'P00 ERROR:' are cleaned up."""
110-
stdout = "2025-11-07 07:21:11.120 P00 ERROR: test error message"
111-
stderr = ""
112-
result = PostgreSQLBackups._extract_error_message(stdout, stderr)
89+
stderr = "2025-11-07 07:21:11.120 P00 ERROR: test error message"
90+
result = PostgreSQLBackups._extract_error_message(stderr)
11391
assert result == "ERROR: test error message"
11492

11593

0 commit comments

Comments
 (0)