Skip to content
Open
2 changes: 1 addition & 1 deletion libcxx/test/selftest/dsl/lit.local.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@
# within the test.
import base64, lit.util, pickle

base64Encode = lambda s: lit.util.to_string(base64.b64encode(lit.util.to_bytes(s)))
base64Encode = lambda s: base64.b64encode(s).decode("utf-8")
escapedSubstitutions = base64Encode(pickle.dumps(config.substitutions))
config.substitutions.append(("%{substitutions}", escapedSubstitutions))
97 changes: 33 additions & 64 deletions llvm/utils/lit/lit/TestRunner.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import lit.ShUtil as ShUtil
import lit.Test as Test
import lit.util
from lit.util import to_bytes, to_string, to_unicode
from lit.BooleanExpression import BooleanExpression


Expand Down Expand Up @@ -391,18 +390,14 @@ def executeBuiltinEcho(cmd, shenv):
# Some tests have un-redirected echo commands to help debug test failures.
# Buffer our output and return it to the caller.
is_redirected = True
encode = lambda x: x
if stdout == subprocess.PIPE:
is_redirected = False
stdout = StringIO()
elif kIsWindows:
# Reopen stdout in binary mode to avoid CRLF translation. The versions
# of echo we are replacing on Windows all emit plain LF, and the LLVM
# tests now depend on this.
# When we open as binary, however, this also means that we have to write
# 'bytes' objects to stdout instead of 'str' objects.
encode = lit.util.to_bytes
stdout = open(stdout.name, stdout.mode + "b")
# Reopen stdout with specifying `newline` to avoid CRLF translation.
# The versions of echo we are replacing on Windows all emit plain LF,
# and the LLVM tests now depend on this.
stdout = open(stdout.name, stdout.mode, encoding="utf-8", newline="")
opened_files.append((None, None, stdout, None))

# Implement echo flags. We only support -e and -n, and not yet in
Expand All @@ -423,16 +418,15 @@ def maybeUnescape(arg):
if not interpret_escapes:
return arg

arg = lit.util.to_bytes(arg)
return arg.decode("unicode_escape")
return arg.encode("utf-8").decode("unicode_escape")

if args:
for arg in args[:-1]:
stdout.write(encode(maybeUnescape(arg)))
stdout.write(encode(" "))
stdout.write(encode(maybeUnescape(args[-1])))
stdout.write(maybeUnescape(arg))
stdout.write(" ")
stdout.write(maybeUnescape(args[-1]))
if write_newline:
stdout.write(encode("\n"))
stdout.write("\n")

for (name, mode, f, path) in opened_files:
f.close()
Expand Down Expand Up @@ -463,7 +457,7 @@ def executeBuiltinMkdir(cmd, cmd_shenv):
exitCode = 0
for dir in args:
dir = pathlib.Path(dir)
cwd = pathlib.Path(to_unicode(cmd_shenv.cwd))
cwd = pathlib.Path(cmd_shenv.cwd)
if not dir.is_absolute():
dir = lit.util.abs_path_preserve_drive(cwd / dir)
if parent:
Expand Down Expand Up @@ -508,8 +502,6 @@ def on_rm_error(func, path, exc_info):
exitCode = 0
for path in args:
cwd = cmd_shenv.cwd
path = to_unicode(path) if kIsWindows else to_bytes(path)
cwd = to_unicode(cwd) if kIsWindows else to_bytes(cwd)
if not os.path.isabs(path):
path = lit.util.abs_path_preserve_drive(os.path.join(cwd, path))
if force and not os.path.exists(path):
Expand Down Expand Up @@ -718,10 +710,7 @@ def processRedirects(cmd, stdin_source, cmd_shenv, opened_files):
else:
# Make sure relative paths are relative to the cwd.
redir_filename = os.path.join(cmd_shenv.cwd, name)
redir_filename = (
to_unicode(redir_filename) if kIsWindows else to_bytes(redir_filename)
)
fd = open(redir_filename, mode)
fd = open(redir_filename, mode, encoding="utf-8")
# Workaround a Win32 and/or subprocess bug when appending.
#
# FIXME: Actually, this is probably an instance of PR6753.
Expand Down Expand Up @@ -1083,14 +1072,14 @@ def _executeShCmd(cmd, shenv, results, timeoutHelper):
if out is None:
out = ""
else:
out = to_string(out.decode("utf-8", errors="replace"))
out = out.decode("utf-8", errors="replace")
except:
out = str(out)
try:
if err is None:
err = ""
else:
err = to_string(err.decode("utf-8", errors="replace"))
err = err.decode("utf-8", errors="replace")
except:
err = str(err)

Expand Down Expand Up @@ -1282,7 +1271,7 @@ def executeScriptInternal(

# Add the command output, if redirected.
for (name, path, data) in result.outputFiles:
data = to_string(data.decode("utf-8", errors="replace"))
data = data.decode("utf-8", errors="replace")
out += formatOutput(f"redirected output from '{name}'", data, limit=1024)
if result.stdout.strip():
out += formatOutput("command stdout", result.stdout)
Expand Down Expand Up @@ -1338,13 +1327,6 @@ def executeScript(test, litConfig, tmpBase, commands, cwd):
script += ".bat"

# Write script file
mode = "w"
open_kwargs = {}
if litConfig.isWindows and not isWin32CMDEXE:
mode += "b" # Avoid CRLFs when writing bash scripts.
else:
open_kwargs["encoding"] = "utf-8"
f = open(script, mode, **open_kwargs)
if isWin32CMDEXE:
for i, ln in enumerate(commands):
match = re.fullmatch(kPdbgRegex, ln)
Expand All @@ -1353,8 +1335,9 @@ def executeScript(test, litConfig, tmpBase, commands, cwd):
commands[i] = match.expand(
"echo '\\1' > nul && " if command else "echo '\\1' > nul"
)
f.write("@echo on\n")
f.write("\n@if %ERRORLEVEL% NEQ 0 EXIT\n".join(commands))
with open(script, "w", encoding="utf-8") as f:
f.write("@echo on\n")
f.write("\n@if %ERRORLEVEL% NEQ 0 EXIT\n".join(commands))
else:
for i, ln in enumerate(commands):
match = re.fullmatch(kPdbgRegex, ln)
Expand Down Expand Up @@ -1393,8 +1376,6 @@ def executeScript(test, litConfig, tmpBase, commands, cwd):
# seen the latter manage to terminate the shell running lit.
if command:
commands[i] += f" && {{ {command}; }}"
if test.config.pipefail:
f.write(b"set -o pipefail;" if mode == "wb" else "set -o pipefail;")

# Manually export any DYLD_* variables used by dyld on macOS because
# otherwise they are lost when the shell executable is run, before the
Expand All @@ -1404,14 +1385,14 @@ def executeScript(test, litConfig, tmpBase, commands, cwd):
for k, v in test.config.environment.items()
if k.startswith("DYLD_")
)
f.write(bytes(env_str, "utf-8") if mode == "wb" else env_str)
f.write(b"set -x;" if mode == "wb" else "set -x;")
if mode == "wb":
f.write(bytes("{ " + "; } &&\n{ ".join(commands) + "; }", "utf-8"))
else:

with open(script, "w", encoding="utf-8", newline="") as f:
if test.config.pipefail:
f.write("set -o pipefail;")
f.write(env_str)
f.write("set -x;")
f.write("{ " + "; } &&\n{ ".join(commands) + "; }")
f.write(b"\n" if mode == "wb" else "\n")
f.close()
f.write("\n")

if isWin32CMDEXE:
command = ["cmd", "/c", script]
Expand Down Expand Up @@ -1445,19 +1426,11 @@ def parseIntegratedTestScriptCommands(source_path, keywords):
(line_number, command_type, line).
"""

# This code is carefully written to be dual compatible with Python 2.5+ and
# Python 3 without requiring input files to always have valid codings. The
# trick we use is to open the file in binary mode and use the regular
# expression library to find the commands, with it scanning strings in
# Python2 and bytes in Python3.
#
# Once we find a match, we do require each script line to be decodable to
# UTF-8, so we convert the outputs to UTF-8 before returning. This way the
# remaining code can work with "strings" agnostic of the executing Python
# version.
# We use `bytes` for scanning input files to avoid requiring them to always
# have valid codings.

keywords_re = re.compile(
to_bytes("(%s)(.*)\n" % ("|".join(re.escape(k) for k in keywords),))
b"(%s)(.*)\n" % (b"|".join(re.escape(k.encode("utf-8")) for k in keywords),)
)

f = open(source_path, "rb")
Expand All @@ -1466,8 +1439,8 @@ def parseIntegratedTestScriptCommands(source_path, keywords):
data = f.read()

# Ensure the data ends with a newline.
if not data.endswith(to_bytes("\n")):
data = data + to_bytes("\n")
if not data.endswith(b"\n"):
data = data + b"\n"

# Iterate over the matches.
line_number = 1
Expand All @@ -1476,24 +1449,20 @@ def parseIntegratedTestScriptCommands(source_path, keywords):
# Compute the updated line number by counting the intervening
# newlines.
match_position = match.start()
line_number += data.count(
to_bytes("\n"), last_match_position, match_position
)
line_number += data.count(b"\n", last_match_position, match_position)
last_match_position = match_position

# Convert the keyword and line to UTF-8 strings and yield the
# command. Note that we take care to return regular strings in
# Python 2, to avoid other code having to differentiate between the
# str and unicode types.
# command.
#
# Opening the file in binary mode prevented Windows \r newline
# characters from being converted to Unix \n newlines, so manually
# strip those from the yielded lines.
keyword, ln = match.groups()
yield (
line_number,
to_string(keyword.decode("utf-8")),
to_string(ln.decode("utf-8").rstrip("\r")),
keyword.decode("utf-8"),
ln.decode("utf-8").rstrip("\r"),
)
finally:
f.close()
Expand Down
6 changes: 2 additions & 4 deletions llvm/utils/lit/lit/builtin_commands/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import sys

import util
from util import to_string


class DiffFlags:
Expand Down Expand Up @@ -67,10 +66,9 @@ def compareTwoBinaryFiles(flags, filepaths, filelines):
filepaths[1].encode(),
n=flags.num_context_lines,
)
diffs = [diff.decode(errors="backslashreplace") for diff in diffs]

for diff in diffs:
sys.stdout.write(to_string(diff))
sys.stdout.write(diff.decode(errors="backslashreplace"))
exitCode = 1
return exitCode

Expand Down Expand Up @@ -117,7 +115,7 @@ def compose2(f, g):
filepaths[1],
n=flags.num_context_lines,
):
sys.stdout.write(to_string(diff))
sys.stdout.write(diff)
exitCode = 1
return exitCode

Expand Down
2 changes: 1 addition & 1 deletion llvm/utils/lit/lit/formats/googletest.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def get_num_tests(self, path, litConfig, localConfig):
return None
return sum(
map(
lambda line: lit.util.to_string(line).startswith(" "),
lambda line: line.startswith(b" "),
out.splitlines(False),
)
)
Expand Down
6 changes: 3 additions & 3 deletions llvm/utils/lit/lit/llvm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ def _find_git_windows_unix_tools(self, tools_needed):
continue

# We found it, stop enumerating.
return lit.util.to_string(candidate_path)
return candidate_path
except:
continue

Expand Down Expand Up @@ -284,8 +284,8 @@ def get_process_output(self, command):
env=self.config.environment,
)
stdout, stderr = cmd.communicate()
stdout = lit.util.to_string(stdout)
stderr = lit.util.to_string(stderr)
stdout = stdout.decode("utf-8", errors="replace")
stderr = stderr.decode("utf-8", errors="replace")
return (stdout, stderr)
except OSError:
self.lit_config.fatal("Could not run process %s" % command)
Expand Down
4 changes: 2 additions & 2 deletions llvm/utils/lit/lit/reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@ def write_results(self, tests, elapsed):
fd, _ = tempfile.mkstemp(
suffix=ext, prefix=f"{filename}.", dir=os.path.dirname(self.output_file)
)
report_file = os.fdopen(fd, "w")
report_file = os.fdopen(fd, "w", encoding="utf-8")
else:
# Overwrite if the results already exist.
report_file = open(self.output_file, "w")
report_file = open(self.output_file, "w", encoding="utf-8")

with report_file:
self._write_results_to_file(tests, elapsed, report_file)
Expand Down
Loading