Skip to content

Commit 9db9b1e

Browse files
committed
Standardize pgBackRest logging to use stderr consistently
Addresses code review feedback from PR #1320 (review #3577653265) about potential log duplication when checking both stdout and stderr for errors. Changes: - Add PGBACKREST_LOG_LEVEL_STDERR constant (--log-level-stderr=warn) to ensure all pgBackRest commands output errors/warnings to stderr - Update all 9 pgBackRest command invocations to include the new flag - Simplify _extract_error_message() to only check stderr (removes stdout parameter entirely) - Update all 8 call sites and 7 unit tests to use new single-parameter API - Remove redundant test for stdout handling This approach eliminates the risk of log duplication while maintaining predictable error extraction. All errors and warnings now consistently appear in stderr, making error handling more reliable. Reference: https://pgbackrest.org/configuration.html#section-log
1 parent 1382039 commit 9db9b1e

File tree

3 files changed

+60
-66
lines changed

3 files changed

+60
-66
lines changed

src/backups.py

Lines changed: 36 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
PGBACKREST_CONF_PATH,
4040
PGBACKREST_CONFIGURATION_FILE,
4141
PGBACKREST_EXECUTABLE,
42+
PGBACKREST_LOG_LEVEL_STDERR,
4243
PGBACKREST_LOGROTATE_FILE,
4344
PGBACKREST_LOGS_PATH,
4445
POSTGRESQL_DATA_PATH,
@@ -184,7 +185,13 @@ def can_use_s3_repository(self) -> tuple[bool, str]:
184185

185186
try:
186187
return_code, stdout, stderr = self._execute_command(
187-
[PGBACKREST_EXECUTABLE, PGBACKREST_CONFIGURATION_FILE, "info", "--output=json"],
188+
[
189+
PGBACKREST_EXECUTABLE,
190+
PGBACKREST_CONFIGURATION_FILE,
191+
PGBACKREST_LOG_LEVEL_STDERR,
192+
"info",
193+
"--output=json",
194+
],
188195
timeout=30,
189196
)
190197
except TimeoutExpired as e:
@@ -196,7 +203,7 @@ def can_use_s3_repository(self) -> tuple[bool, str]:
196203

197204
else:
198205
if return_code != 0:
199-
extracted_error = self._extract_error_message(stdout, stderr)
206+
extracted_error = self._extract_error_message(stderr)
200207
logger.error(f"Failed to run pgbackrest: {extracted_error}")
201208
return False, FAILED_TO_INITIALIZE_STANZA_ERROR_MESSAGE
202209

@@ -360,23 +367,25 @@ def _execute_command(
360367
return process.returncode, process.stdout.decode(), process.stderr.decode()
361368

362369
@staticmethod
363-
def _extract_error_message(stdout: str, stderr: str) -> str:
364-
"""Extract key error message from pgBackRest output.
370+
def _extract_error_message(stderr: str) -> str:
371+
"""Extract key error message from pgBackRest stderr output.
372+
373+
Since we standardize all pgBackRest commands to use --log-level-stderr=warn,
374+
all errors and warnings are consistently written to stderr. This makes error
375+
extraction predictable and avoids potential log duplication issues.
365376
366377
Args:
367-
stdout: Standard output from pgBackRest command.
368-
stderr: Standard error from pgBackRest command.
378+
stderr: Standard error from pgBackRest command containing errors/warnings.
369379
370380
Returns:
371-
Extracted error message, prioritizing ERROR/WARN lines from output.
381+
Extracted error message from stderr, prioritizing ERROR/WARN lines.
372382
"""
373-
combined_output = f"{stdout}\n{stderr}".strip()
374-
if not combined_output:
383+
if not stderr.strip():
375384
return f"Unknown error occurred. Please check the logs at {PGBACKREST_LOGS_PATH}"
376385

377-
# Extract lines with ERROR or WARN markers from pgBackRest output
386+
# Extract lines with ERROR or WARN markers from pgBackRest stderr output
378387
error_lines = []
379-
for line in combined_output.splitlines():
388+
for line in stderr.splitlines():
380389
if "ERROR:" in line or "WARN:" in line:
381390
# Clean up the line by removing debug prefixes like "P00 ERROR:"
382391
cleaned = re.sub(r"^.*?(ERROR:|WARN:)", r"\1", line).strip()
@@ -386,13 +395,8 @@ def _extract_error_message(stdout: str, stderr: str) -> str:
386395
if error_lines:
387396
return "; ".join(error_lines)
388397

389-
# Otherwise return the last non-empty line from stderr or stdout
390-
if stderr.strip():
391-
return stderr.strip().splitlines()[-1]
392-
if stdout.strip():
393-
return stdout.strip().splitlines()[-1]
394-
395-
return f"Unknown error occurred. Please check the logs at {PGBACKREST_LOGS_PATH}"
398+
# Otherwise return the last non-empty line from stderr
399+
return stderr.strip().splitlines()[-1]
396400

397401
def _format_backup_list(self, backup_list) -> str:
398402
"""Formats provided list of backups as a table."""
@@ -438,11 +442,12 @@ def _generate_backup_list_output(self) -> str:
438442
return_code, output, stderr = self._execute_command([
439443
PGBACKREST_EXECUTABLE,
440444
PGBACKREST_CONFIGURATION_FILE,
445+
PGBACKREST_LOG_LEVEL_STDERR,
441446
"info",
442447
"--output=json",
443448
])
444449
if return_code != 0:
445-
extracted_error = self._extract_error_message(output, stderr)
450+
extracted_error = self._extract_error_message(stderr)
446451
raise ListBackupsError(f"Failed to list backups with error: {extracted_error}")
447452

448453
backups = json.loads(output)[0]["backup"]
@@ -510,11 +515,12 @@ def _list_backups(self, show_failed: bool, parse=True) -> dict[str, tuple[str, s
510515
return_code, output, stderr = self._execute_command([
511516
PGBACKREST_EXECUTABLE,
512517
PGBACKREST_CONFIGURATION_FILE,
518+
PGBACKREST_LOG_LEVEL_STDERR,
513519
"info",
514520
"--output=json",
515521
])
516522
if return_code != 0:
517-
extracted_error = self._extract_error_message(output, stderr)
523+
extracted_error = self._extract_error_message(stderr)
518524
raise ListBackupsError(f"Failed to list backups with error: {extracted_error}")
519525

520526
repository_info = next(iter(json.loads(output)), None)
@@ -545,12 +551,13 @@ def _list_timelines(self) -> dict[str, tuple[str, str]]:
545551
return_code, output, stderr = self._execute_command([
546552
PGBACKREST_EXECUTABLE,
547553
PGBACKREST_CONFIGURATION_FILE,
554+
PGBACKREST_LOG_LEVEL_STDERR,
548555
"repo-ls",
549556
"--recurse",
550557
"--output=json",
551558
])
552559
if return_code != 0:
553-
extracted_error = self._extract_error_message(output, stderr)
560+
extracted_error = self._extract_error_message(stderr)
554561
raise ListBackupsError(f"Failed to list repository with error: {extracted_error}")
555562

556563
repository = json.loads(output).items()
@@ -663,6 +670,7 @@ def _initialise_stanza(self, event: HookEvent) -> bool:
663670
return_code, _, stderr = self._execute_command([
664671
PGBACKREST_EXECUTABLE,
665672
PGBACKREST_CONFIGURATION_FILE,
673+
PGBACKREST_LOG_LEVEL_STDERR,
666674
f"--stanza={self.stanza_name}",
667675
"stanza-create",
668676
])
@@ -721,14 +729,15 @@ def check_stanza(self) -> bool:
721729
return_code, stdout, stderr = self._execute_command([
722730
PGBACKREST_EXECUTABLE,
723731
PGBACKREST_CONFIGURATION_FILE,
732+
PGBACKREST_LOG_LEVEL_STDERR,
724733
f"--stanza={self.stanza_name}",
725734
"check",
726735
])
727736
if return_code == PGBACKREST_ARCHIVE_TIMEOUT_ERROR_CODE:
728737
# Raise an error if the archive command timeouts, so the user has the possibility
729738
# to fix network issues and call juju resolve to re-trigger the hook that calls
730739
# this method.
731-
extracted_error = self._extract_error_message(stdout, stderr)
740+
extracted_error = self._extract_error_message(stderr)
732741
logger.error(
733742
f"error: {extracted_error} - please fix the error and call juju resolve on this unit"
734743
)
@@ -787,12 +796,13 @@ def _is_primary_pgbackrest_service_running(self) -> bool:
787796
return False
788797
return_code, stdout, stderr = self._execute_command([
789798
PGBACKREST_EXECUTABLE,
799+
PGBACKREST_LOG_LEVEL_STDERR,
790800
"server-ping",
791801
"--io-timeout=10",
792802
self.charm.primary_endpoint,
793803
])
794804
if return_code != 0:
795-
extracted_error = self._extract_error_message(stdout, stderr)
805+
extracted_error = self._extract_error_message(stderr)
796806
logger.warning(
797807
f"Failed to contact pgBackRest TLS server on {self.charm.primary_endpoint} with error {extracted_error}"
798808
)
@@ -984,8 +994,8 @@ def _run_backup(
984994
command = [
985995
PGBACKREST_EXECUTABLE,
986996
PGBACKREST_CONFIGURATION_FILE,
997+
PGBACKREST_LOG_LEVEL_STDERR,
987998
f"--stanza={self.stanza_name}",
988-
"--log-level-console=debug",
989999
f"--type={BACKUP_TYPE_OVERRIDES[backup_type]}",
9901000
"backup",
9911001
]
@@ -1020,7 +1030,7 @@ def _run_backup(
10201030
f"backup/{self.stanza_name}/{backup_id}/backup.log",
10211031
s3_parameters,
10221032
)
1023-
extracted_error = self._extract_error_message(stdout, stderr)
1033+
extracted_error = self._extract_error_message(stderr)
10241034
error_message = f"Failed to backup PostgreSQL with error: {extracted_error}"
10251035
logger.error(f"Backup failed: {error_message}")
10261036
event.fail(error_message)
@@ -1188,7 +1198,7 @@ def _on_restore_action(self, event): # noqa: C901
11881198
timeout=10,
11891199
)
11901200
if return_code != 0:
1191-
extracted_error = self._extract_error_message(stdout, stderr)
1201+
extracted_error = self._extract_error_message(stderr)
11921202
error_message = (
11931203
f"Failed to remove previous cluster information with error: {extracted_error}"
11941204
)

src/constants.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,12 @@
2626

2727
# Snap constants.
2828
PGBACKREST_EXECUTABLE = "charmed-postgresql.pgbackrest"
29+
# pgBackRest logging configuration
30+
# We use stderr for all error/warning output to have a consistent, predictable error extraction
31+
# mechanism. By default, pgBackRest uses stdout (console) for warnings, but we standardize on
32+
# stderr to avoid potential log duplication and to make error handling more reliable.
33+
# Reference: https://pgbackrest.org/configuration.html#section-log
34+
PGBACKREST_LOG_LEVEL_STDERR = "--log-level-stderr=warn"
2935
# pgBackRest error codes
3036
PGBACKREST_ARCHIVE_TIMEOUT_ERROR_CODE = (
3137
82 # Archive timeout - unable to archive WAL files within configured timeout period

tests/unit/test_backups.py

Lines changed: 18 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -37,79 +37,57 @@ def harness():
3737
harness.cleanup()
3838

3939

40-
def test_extract_error_message_with_error_in_stdout():
41-
"""Test extracting error from stdout with ERROR marker."""
42-
stdout = """2025-11-07 07:21:11.120 P00 ERROR: [056]: unable to find primary cluster - cannot proceed
40+
def test_extract_error_message_with_error_in_stderr():
41+
"""Test extracting error from stderr with ERROR marker."""
42+
stderr = """2025-11-07 07:21:11.120 P00 ERROR: [056]: unable to find primary cluster - cannot proceed
4343
HINT: are all available clusters in recovery?"""
44-
stderr = ""
45-
result = PostgreSQLBackups._extract_error_message(stdout, stderr)
44+
result = PostgreSQLBackups._extract_error_message(stderr)
4645
assert result == "ERROR: [056]: unable to find primary cluster - cannot proceed"
4746

4847

49-
def test_extract_error_message_with_error_in_stderr():
48+
def test_extract_error_message_with_plain_stderr():
5049
"""Test extracting error from stderr when no ERROR marker."""
51-
stdout = ""
5250
stderr = "Connection refused: cannot connect to S3"
53-
result = PostgreSQLBackups._extract_error_message(stdout, stderr)
51+
result = PostgreSQLBackups._extract_error_message(stderr)
5452
assert result == "Connection refused: cannot connect to S3"
5553

5654

57-
def test_extract_error_message_with_both_stdout_and_stderr():
58-
"""Test extracting error when ERROR marker is in stdout and stderr has additional info."""
59-
stdout = "P00 ERROR: database error occurred"
60-
stderr = "Additional stderr info"
61-
result = PostgreSQLBackups._extract_error_message(stdout, stderr)
62-
assert result == "ERROR: database error occurred"
63-
64-
6555
def test_extract_error_message_with_warning():
66-
"""Test extracting warning message from output."""
67-
stdout = "P00 WARN: configuration issue detected"
68-
stderr = ""
69-
result = PostgreSQLBackups._extract_error_message(stdout, stderr)
56+
"""Test extracting warning message from stderr."""
57+
stderr = "P00 WARN: configuration issue detected"
58+
result = PostgreSQLBackups._extract_error_message(stderr)
7059
assert result == "WARN: configuration issue detected"
7160

7261

7362
def test_extract_error_message_with_multiple_errors():
74-
"""Test extracting multiple ERROR/WARN lines."""
75-
stdout = """P00 ERROR: first error
63+
"""Test extracting multiple ERROR/WARN lines from stderr."""
64+
stderr = """P00 ERROR: first error
7665
P00 WARN: warning message
7766
P00 ERROR: second error"""
78-
stderr = ""
79-
result = PostgreSQLBackups._extract_error_message(stdout, stderr)
67+
result = PostgreSQLBackups._extract_error_message(stderr)
8068
assert result == "ERROR: first error; WARN: warning message; ERROR: second error"
8169

8270

8371
def test_extract_error_message_with_empty_output():
84-
"""Test with empty stdout and stderr returns helpful message."""
85-
result = PostgreSQLBackups._extract_error_message("", "")
72+
"""Test with empty stderr returns helpful message."""
73+
result = PostgreSQLBackups._extract_error_message("")
8674
assert (
8775
result
8876
== "Unknown error occurred. Please check the logs at /var/snap/charmed-postgresql/common/var/log/pgbackrest"
8977
)
9078

9179

92-
def test_extract_error_message_fallback_to_stdout_last_line():
93-
"""Test fallback to last line of stdout when no ERROR/WARN markers."""
94-
stdout = "Some generic output\nMore details here\nFinal error message"
95-
stderr = ""
96-
result = PostgreSQLBackups._extract_error_message(stdout, stderr)
97-
assert result == "Final error message"
98-
99-
10080
def test_extract_error_message_fallback_to_stderr_last_line():
101-
"""Test fallback to last line of stderr when no ERROR/WARN markers and no stdout."""
102-
stdout = ""
81+
"""Test fallback to last line of stderr when no ERROR/WARN markers."""
10382
stderr = "Line 1\nLine 2\nFinal error message"
104-
result = PostgreSQLBackups._extract_error_message(stdout, stderr)
83+
result = PostgreSQLBackups._extract_error_message(stderr)
10584
assert result == "Final error message"
10685

10786

10887
def test_extract_error_message_cleans_debug_prefix():
10988
"""Test that debug prefixes like 'P00 ERROR:' are cleaned up."""
110-
stdout = "2025-11-07 07:21:11.120 P00 ERROR: test error message"
111-
stderr = ""
112-
result = PostgreSQLBackups._extract_error_message(stdout, stderr)
89+
stderr = "2025-11-07 07:21:11.120 P00 ERROR: test error message"
90+
result = PostgreSQLBackups._extract_error_message(stderr)
11391
assert result == "ERROR: test error message"
11492

11593

0 commit comments

Comments
 (0)