2121import lit .ShUtil as ShUtil
2222import lit .Test as Test
2323import lit .util
24- from lit .util import to_bytes
2524from lit .BooleanExpression import BooleanExpression
2625
2726
@@ -1419,19 +1418,11 @@ def parseIntegratedTestScriptCommands(source_path, keywords):
14191418 (line_number, command_type, line).
14201419 """
14211420
1422- # This code is carefully written to be dual compatible with Python 2.5+ and
1423- # Python 3 without requiring input files to always have valid codings. The
1424- # trick we use is to open the file in binary mode and use the regular
1425- # expression library to find the commands, with it scanning strings in
1426- # Python2 and bytes in Python3.
1427- #
1428- # Once we find a match, we do require each script line to be decodable to
1429- # UTF-8, so we convert the outputs to UTF-8 before returning. This way the
1430- # remaining code can work with "strings" agnostic of the executing Python
1431- # version.
1421+ # We use `bytes` for scanning input files to avoid requiring them to always
1422+ # have valid codings.
14321423
14331424 keywords_re = re .compile (
1434- to_bytes ( "(%s)(.*)\n " % ("|" .join (re .escape (k ) for k in keywords ),) )
1425+ b "(%s)(.*)\n " % (b "|" .join (re .escape (k . encode ( "utf-8" )) for k in keywords ),)
14351426 )
14361427
14371428 f = open (source_path , "rb" )
@@ -1440,8 +1431,8 @@ def parseIntegratedTestScriptCommands(source_path, keywords):
14401431 data = f .read ()
14411432
14421433 # Ensure the data ends with a newline.
1443- if not data .endswith (to_bytes ( "\n " ) ):
1444- data = data + to_bytes ( "\n " )
1434+ if not data .endswith (b "\n " ):
1435+ data = data + b "\n "
14451436
14461437 # Iterate over the matches.
14471438 line_number = 1
@@ -1451,14 +1442,12 @@ def parseIntegratedTestScriptCommands(source_path, keywords):
14511442 # newlines.
14521443 match_position = match .start ()
14531444 line_number += data .count (
1454- to_bytes ( "\n " ) , last_match_position , match_position
1445+ b "\n " , last_match_position , match_position
14551446 )
14561447 last_match_position = match_position
14571448
14581449 # Convert the keyword and line to UTF-8 strings and yield the
1459- # command. Note that we take care to return regular strings in
1460- # Python 2, to avoid other code having to differentiate between the
1461- # str and unicode types.
1450+ # command.
14621451 #
14631452 # Opening the file in binary mode prevented Windows \r newline
14641453 # characters from being converted to Unix \n newlines, so manually
0 commit comments