Skip to content

Commit e9d2732

Browse files
authored
log.py: improve utf-8 handling, and non-utf-8 output (spack#48005)
1 parent 0352552 commit e9d2732

File tree

2 files changed

+22
-18
lines changed

2 files changed

+22
-18
lines changed

lib/spack/llnl/util/tty/log.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -879,10 +879,13 @@ def _writer_daemon(
879879
write_fd.close()
880880

881881
# 1. Use line buffering (3rd param = 1) since Python 3 has a bug
882-
# that prevents unbuffered text I/O.
883-
# 2. Python 3.x before 3.7 does not open with UTF-8 encoding by default
882+
# that prevents unbuffered text I/O. [needs citation]
883+
# 2. Enforce a UTF-8 interpretation of build process output with errors replaced by '?'.
884+
# The downside is that the log file will not contain the exact output of the build process.
884885
# 3. closefd=False because Connection has "ownership"
885-
read_file = os.fdopen(read_fd.fileno(), "r", 1, encoding="utf-8", closefd=False)
886+
read_file = os.fdopen(
887+
read_fd.fileno(), "r", 1, encoding="utf-8", errors="replace", closefd=False
888+
)
886889

887890
if stdin_fd:
888891
stdin_file = os.fdopen(stdin_fd.fileno(), closefd=False)
@@ -928,11 +931,7 @@ def _writer_daemon(
928931
try:
929932
while line_count < 100:
930933
# Handle output from the calling process.
931-
try:
932-
line = _retry(read_file.readline)()
933-
except UnicodeDecodeError:
934-
# installs like --test=root gpgme produce non-UTF8 logs
935-
line = "<line lost: output was not encoded as UTF-8>\n"
934+
line = _retry(read_file.readline)()
936935

937936
if not line:
938937
return
@@ -946,6 +945,13 @@ def _writer_daemon(
946945
output_line = clean_line
947946
if filter_fn:
948947
output_line = filter_fn(clean_line)
948+
enc = sys.stdout.encoding
949+
if enc != "utf-8":
950+
# On Python 3.6 and 3.7-3.14 with non-{utf-8,C} locale stdout
951+
# may not be able to handle utf-8 output. We do an inefficient
952+
# dance of re-encoding with errors replaced, so stdout.write
953+
# does not raise.
954+
output_line = output_line.encode(enc, "replace").decode(enc)
949955
sys.stdout.write(output_line)
950956

951957
# Stripped output to log file.

lib/spack/spack/test/llnl/util/tty/log.py

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -57,18 +57,16 @@ def test_log_python_output_without_echo(capfd, tmpdir):
5757
assert capfd.readouterr()[0] == ""
5858

5959

60-
def test_log_python_output_with_invalid_utf8(capfd, tmpdir):
61-
with tmpdir.as_cwd():
62-
with log.log_output("foo.txt"):
63-
sys.stdout.buffer.write(b"\xc3\x28\n")
60+
def test_log_python_output_with_invalid_utf8(capfd, tmp_path):
61+
tmp_file = str(tmp_path / "foo.txt")
62+
with log.log_output(tmp_file, echo=True):
63+
sys.stdout.buffer.write(b"\xc3helloworld\n")
6464

65-
expected = b"<line lost: output was not encoded as UTF-8>\n"
66-
with open("foo.txt", "rb") as f:
67-
written = f.read()
68-
assert written == expected
65+
# we should be able to read this as valid utf-8
66+
with open(tmp_file, "r", encoding="utf-8") as f:
67+
assert f.read() == "�helloworld\n"
6968

70-
# nothing on stdout or stderr
71-
assert capfd.readouterr()[0] == ""
69+
assert capfd.readouterr().out == "�helloworld\n"
7270

7371

7472
def test_log_python_output_and_echo_output(capfd, tmpdir):

0 commit comments

Comments
 (0)