diff --git a/libcxx/test/selftest/dsl/lit.local.cfg b/libcxx/test/selftest/dsl/lit.local.cfg index dc6887ad7e48b..73e1c4db9ca4e 100644 --- a/libcxx/test/selftest/dsl/lit.local.cfg +++ b/libcxx/test/selftest/dsl/lit.local.cfg @@ -10,6 +10,6 @@ # within the test. import base64, lit.util, pickle -base64Encode = lambda s: lit.util.to_string(base64.b64encode(lit.util.to_bytes(s))) +base64Encode = lambda s: base64.b64encode(s).decode("utf-8") escapedSubstitutions = base64Encode(pickle.dumps(config.substitutions)) config.substitutions.append(("%{substitutions}", escapedSubstitutions)) diff --git a/llvm/utils/lit/lit/TestRunner.py b/llvm/utils/lit/lit/TestRunner.py index 64148c6098327..1b0853b772ea9 100644 --- a/llvm/utils/lit/lit/TestRunner.py +++ b/llvm/utils/lit/lit/TestRunner.py @@ -21,7 +21,6 @@ import lit.ShUtil as ShUtil import lit.Test as Test import lit.util -from lit.util import to_bytes, to_string, to_unicode from lit.BooleanExpression import BooleanExpression @@ -391,18 +390,14 @@ def executeBuiltinEcho(cmd, shenv): # Some tests have un-redirected echo commands to help debug test failures. # Buffer our output and return it to the caller. is_redirected = True - encode = lambda x: x if stdout == subprocess.PIPE: is_redirected = False stdout = StringIO() elif kIsWindows: - # Reopen stdout in binary mode to avoid CRLF translation. The versions - # of echo we are replacing on Windows all emit plain LF, and the LLVM - # tests now depend on this. - # When we open as binary, however, this also means that we have to write - # 'bytes' objects to stdout instead of 'str' objects. - encode = lit.util.to_bytes - stdout = open(stdout.name, stdout.mode + "b") + # Reopen stdout with `newline=""` to avoid CRLF translation. + # The versions of echo we are replacing on Windows all emit plain LF, + # and the LLVM tests now depend on this. + stdout = open(stdout.name, stdout.mode, encoding="utf-8", newline="") opened_files.append((None, None, stdout, None)) # Implement echo flags. We only support -e and -n, and not yet in @@ -423,16 +418,15 @@ def maybeUnescape(arg): if not interpret_escapes: return arg - arg = lit.util.to_bytes(arg) - return arg.decode("unicode_escape") + return arg.encode("utf-8").decode("unicode_escape") if args: for arg in args[:-1]: - stdout.write(encode(maybeUnescape(arg))) - stdout.write(encode(" ")) - stdout.write(encode(maybeUnescape(args[-1]))) + stdout.write(maybeUnescape(arg)) + stdout.write(" ") + stdout.write(maybeUnescape(args[-1])) if write_newline: - stdout.write(encode("\n")) + stdout.write("\n") for (name, mode, f, path) in opened_files: f.close() @@ -463,7 +457,7 @@ def executeBuiltinMkdir(cmd, cmd_shenv): exitCode = 0 for dir in args: dir = pathlib.Path(dir) - cwd = pathlib.Path(to_unicode(cmd_shenv.cwd)) + cwd = pathlib.Path(cmd_shenv.cwd) if not dir.is_absolute(): dir = lit.util.abs_path_preserve_drive(cwd / dir) if parent: @@ -508,8 +502,6 @@ def on_rm_error(func, path, exc_info): exitCode = 0 for path in args: cwd = cmd_shenv.cwd - path = to_unicode(path) if kIsWindows else to_bytes(path) - cwd = to_unicode(cwd) if kIsWindows else to_bytes(cwd) if not os.path.isabs(path): path = lit.util.abs_path_preserve_drive(os.path.join(cwd, path)) if force and not os.path.exists(path): @@ -718,10 +710,7 @@ def processRedirects(cmd, stdin_source, cmd_shenv, opened_files): else: # Make sure relative paths are relative to the cwd. redir_filename = os.path.join(cmd_shenv.cwd, name) - redir_filename = ( - to_unicode(redir_filename) if kIsWindows else to_bytes(redir_filename) - ) - fd = open(redir_filename, mode) + fd = open(redir_filename, mode, encoding="utf-8") # Workaround a Win32 and/or subprocess bug when appending. # # FIXME: Actually, this is probably an instance of PR6753. @@ -1083,14 +1072,14 @@ def _executeShCmd(cmd, shenv, results, timeoutHelper): if out is None: out = "" else: - out = to_string(out.decode("utf-8", errors="replace")) + out = out.decode("utf-8", errors="replace") except: out = str(out) try: if err is None: err = "" else: - err = to_string(err.decode("utf-8", errors="replace")) + err = err.decode("utf-8", errors="replace") except: err = str(err) @@ -1282,7 +1271,7 @@ def executeScriptInternal( # Add the command output, if redirected. for (name, path, data) in result.outputFiles: - data = to_string(data.decode("utf-8", errors="replace")) + data = data.decode("utf-8", errors="replace") out += formatOutput(f"redirected output from '{name}'", data, limit=1024) if result.stdout.strip(): out += formatOutput("command stdout", result.stdout) @@ -1338,13 +1327,6 @@ def executeScript(test, litConfig, tmpBase, commands, cwd): script += ".bat" # Write script file - mode = "w" - open_kwargs = {} - if litConfig.isWindows and not isWin32CMDEXE: - mode += "b" # Avoid CRLFs when writing bash scripts. - else: - open_kwargs["encoding"] = "utf-8" - f = open(script, mode, **open_kwargs) if isWin32CMDEXE: for i, ln in enumerate(commands): match = re.fullmatch(kPdbgRegex, ln) @@ -1353,8 +1335,9 @@ def executeScript(test, litConfig, tmpBase, commands, cwd): commands[i] = match.expand( "echo '\\1' > nul && " if command else "echo '\\1' > nul" ) - f.write("@echo on\n") - f.write("\n@if %ERRORLEVEL% NEQ 0 EXIT\n".join(commands)) + with open(script, "w", encoding="utf-8") as f: + f.write("@echo on\n") + f.write("\n@if %ERRORLEVEL% NEQ 0 EXIT\n".join(commands)) else: for i, ln in enumerate(commands): match = re.fullmatch(kPdbgRegex, ln) @@ -1393,8 +1376,6 @@ def executeScript(test, litConfig, tmpBase, commands, cwd): # seen the latter manage to terminate the shell running lit. if command: commands[i] += f" && {{ {command}; }}" - if test.config.pipefail: - f.write(b"set -o pipefail;" if mode == "wb" else "set -o pipefail;") # Manually export any DYLD_* variables used by dyld on macOS because # otherwise they are lost when the shell executable is run, before the @@ -1404,14 +1385,14 @@ def executeScript(test, litConfig, tmpBase, commands, cwd): for k, v in test.config.environment.items() if k.startswith("DYLD_") ) - f.write(bytes(env_str, "utf-8") if mode == "wb" else env_str) - f.write(b"set -x;" if mode == "wb" else "set -x;") - if mode == "wb": - f.write(bytes("{ " + "; } &&\n{ ".join(commands) + "; }", "utf-8")) - else: + + with open(script, "w", encoding="utf-8", newline="") as f: + if test.config.pipefail: + f.write("set -o pipefail;") + f.write(env_str) + f.write("set -x;") f.write("{ " + "; } &&\n{ ".join(commands) + "; }") - f.write(b"\n" if mode == "wb" else "\n") - f.close() + f.write("\n") if isWin32CMDEXE: command = ["cmd", "/c", script] @@ -1445,19 +1426,11 @@ def parseIntegratedTestScriptCommands(source_path, keywords): (line_number, command_type, line). """ - # This code is carefully written to be dual compatible with Python 2.5+ and - # Python 3 without requiring input files to always have valid codings. The - # trick we use is to open the file in binary mode and use the regular - # expression library to find the commands, with it scanning strings in - # Python2 and bytes in Python3. - # - # Once we find a match, we do require each script line to be decodable to - # UTF-8, so we convert the outputs to UTF-8 before returning. This way the - # remaining code can work with "strings" agnostic of the executing Python - # version. + # We use `bytes` for scanning input files to avoid requiring them to always + # have valid codings. keywords_re = re.compile( - to_bytes("(%s)(.*)\n" % ("|".join(re.escape(k) for k in keywords),)) + b"(%s)(.*)\n" % (b"|".join(re.escape(k.encode("utf-8")) for k in keywords),) ) f = open(source_path, "rb") @@ -1466,8 +1439,8 @@ def parseIntegratedTestScriptCommands(source_path, keywords): data = f.read() # Ensure the data ends with a newline. - if not data.endswith(to_bytes("\n")): - data = data + to_bytes("\n") + if not data.endswith(b"\n"): + data = data + b"\n" # Iterate over the matches. line_number = 1 @@ -1476,15 +1449,11 @@ def parseIntegratedTestScriptCommands(source_path, keywords): # Compute the updated line number by counting the intervening # newlines. match_position = match.start() - line_number += data.count( - to_bytes("\n"), last_match_position, match_position - ) + line_number += data.count(b"\n", last_match_position, match_position) last_match_position = match_position # Convert the keyword and line to UTF-8 strings and yield the - # command. Note that we take care to return regular strings in - # Python 2, to avoid other code having to differentiate between the - # str and unicode types. + # command. # # Opening the file in binary mode prevented Windows \r newline # characters from being converted to Unix \n newlines, so manually @@ -1492,8 +1461,8 @@ def parseIntegratedTestScriptCommands(source_path, keywords): keyword, ln = match.groups() yield ( line_number, - to_string(keyword.decode("utf-8")), - to_string(ln.decode("utf-8").rstrip("\r")), + keyword.decode("utf-8"), + ln.decode("utf-8").rstrip("\r"), ) finally: f.close() diff --git a/llvm/utils/lit/lit/builtin_commands/diff.py b/llvm/utils/lit/lit/builtin_commands/diff.py index f2b5869b35889..a32a31d50ada8 100644 --- a/llvm/utils/lit/lit/builtin_commands/diff.py +++ b/llvm/utils/lit/lit/builtin_commands/diff.py @@ -8,7 +8,6 @@ import sys import util -from util import to_string class DiffFlags: @@ -67,10 +66,9 @@ def compareTwoBinaryFiles(flags, filepaths, filelines): filepaths[1].encode(), n=flags.num_context_lines, ) - diffs = [diff.decode(errors="backslashreplace") for diff in diffs] for diff in diffs: - sys.stdout.write(to_string(diff)) + sys.stdout.write(diff.decode(errors="backslashreplace")) exitCode = 1 return exitCode @@ -117,7 +115,7 @@ def compose2(f, g): filepaths[1], n=flags.num_context_lines, ): - sys.stdout.write(to_string(diff)) + sys.stdout.write(diff) exitCode = 1 return exitCode diff --git a/llvm/utils/lit/lit/formats/googletest.py b/llvm/utils/lit/lit/formats/googletest.py index 172cd0beee4a1..01820da38c954 100644 --- a/llvm/utils/lit/lit/formats/googletest.py +++ b/llvm/utils/lit/lit/formats/googletest.py @@ -43,7 +43,7 @@ def get_num_tests(self, path, litConfig, localConfig): return None return sum( map( - lambda line: lit.util.to_string(line).startswith(" "), + lambda line: line.startswith(b" "), out.splitlines(False), ) ) diff --git a/llvm/utils/lit/lit/llvm/config.py b/llvm/utils/lit/lit/llvm/config.py index 913ba69d63328..497009848b563 100644 --- a/llvm/utils/lit/lit/llvm/config.py +++ b/llvm/utils/lit/lit/llvm/config.py @@ -223,7 +223,7 @@ def _find_git_windows_unix_tools(self, tools_needed): continue # We found it, stop enumerating. - return lit.util.to_string(candidate_path) + return candidate_path except: continue @@ -284,8 +284,8 @@ def get_process_output(self, command): env=self.config.environment, ) stdout, stderr = cmd.communicate() - stdout = lit.util.to_string(stdout) - stderr = lit.util.to_string(stderr) + stdout = stdout.decode("utf-8", errors="replace") + stderr = stderr.decode("utf-8", errors="replace") return (stdout, stderr) except OSError: self.lit_config.fatal("Could not run process %s" % command) diff --git a/llvm/utils/lit/lit/reports.py b/llvm/utils/lit/lit/reports.py index 1b43ab9357b37..6f8a782a40aa8 100755 --- a/llvm/utils/lit/lit/reports.py +++ b/llvm/utils/lit/lit/reports.py @@ -29,10 +29,10 @@ def write_results(self, tests, elapsed): fd, _ = tempfile.mkstemp( suffix=ext, prefix=f"{filename}.", dir=os.path.dirname(self.output_file) ) - report_file = os.fdopen(fd, "w") + report_file = os.fdopen(fd, "w", encoding="utf-8") else: # Overwrite if the results already exist. - report_file = open(self.output_file, "w") + report_file = open(self.output_file, "w", encoding="utf-8") with report_file: self._write_results_to_file(tests, elapsed, report_file) diff --git a/llvm/utils/lit/lit/util.py b/llvm/utils/lit/lit/util.py index e4e031b3e0898..7815c361a8b7d 100644 --- a/llvm/utils/lit/lit/util.py +++ b/llvm/utils/lit/lit/util.py @@ -33,76 +33,6 @@ def make_word_regex(word): return r"\b" + word + r"\b" -def to_bytes(s): - """Return the parameter as type 'bytes', possibly encoding it. - - In Python2, the 'bytes' type is the same as 'str'. In Python3, they - are distinct. - - """ - if isinstance(s, bytes): - # In Python2, this branch is taken for both 'str' and 'bytes'. - # In Python3, this branch is taken only for 'bytes'. - return s - # In Python2, 's' is a 'unicode' object. - # In Python3, 's' is a 'str' object. - # Encode to UTF-8 to get 'bytes' data. - return s.encode("utf-8") - - -def to_string(b): - """Return the parameter as type 'str', possibly encoding it. - - In Python2, the 'str' type is the same as 'bytes'. In Python3, the - 'str' type is (essentially) Python2's 'unicode' type, and 'bytes' is - distinct. - - """ - if isinstance(b, str): - # In Python2, this branch is taken for types 'str' and 'bytes'. - # In Python3, this branch is taken only for 'str'. - return b - if isinstance(b, bytes): - # In Python2, this branch is never taken ('bytes' is handled as 'str'). - # In Python3, this is true only for 'bytes'. - try: - return b.decode("utf-8") - except UnicodeDecodeError: - # If the value is not valid Unicode, return the default - # repr-line encoding. - return str(b) - - # By this point, here's what we *don't* have: - # - # - In Python2: - # - 'str' or 'bytes' (1st branch above) - # - In Python3: - # - 'str' (1st branch above) - # - 'bytes' (2nd branch above) - # - # The last type we might expect is the Python2 'unicode' type. There is no - # 'unicode' type in Python3 (all the Python3 cases were already handled). In - # order to get a 'str' object, we need to encode the 'unicode' object. - try: - return b.encode("utf-8") - except AttributeError: - raise TypeError("not sure how to convert %s to %s" % (type(b), str)) - - -def to_unicode(s): - """Return the parameter as type which supports unicode, possibly decoding - it. - - In Python2, this is the unicode type. In Python3 it's the str type. - - """ - if isinstance(s, bytes): - # In Python2, this branch is taken for both 'str' and 'bytes'. - # In Python3, this branch is taken only for 'bytes'. - return s.decode("utf-8") - return s - - def usable_core_count(): """Return the number of cores the current process can use, if supported. Otherwise, return the total number of cores (like `os.cpu_count()`). @@ -341,7 +271,7 @@ def executeCommand( """ if input is not None: - input = to_bytes(input) + input = input.encode("utf-8") err_out = subprocess.STDOUT if redirect_stderr else subprocess.PIPE p = subprocess.Popen( command, @@ -377,8 +307,8 @@ def killProcess(): timerObject.cancel() # Ensure the resulting output is always of string type. - out = to_string(out) - err = "" if redirect_stderr else to_string(err) + out = out.decode("utf-8", errors="replace") + err = "" if redirect_stderr else err.decode("utf-8", errors="replace") if hitTimeOut[0]: raise ExecuteCommandTimeoutException(