|
| 1 | +import ast |
| 2 | +import io |
| 3 | +import operator |
| 4 | +import os |
| 5 | +import sys |
| 6 | +import token |
| 7 | +import tokenize |
| 8 | + |
| 9 | + |
| 10 | +class Visitor(ast.NodeVisitor): |
| 11 | + def __init__(self, lines): |
| 12 | + self._lines = lines |
| 13 | + self.line_numbers_with_nodes = set() |
| 14 | + self.line_numbers_with_statements = [] |
| 15 | + |
| 16 | + def generic_visit(self, node): |
| 17 | + if hasattr(node, 'col_offset') and hasattr(node, 'lineno') and node.col_offset == 0: |
| 18 | + self.line_numbers_with_nodes.add(node.lineno) |
| 19 | + if isinstance(node, ast.stmt): |
| 20 | + self.line_numbers_with_statements.append(node.lineno) |
| 21 | + |
| 22 | + ast.NodeVisitor.generic_visit(self, node) |
| 23 | + |
| 24 | + |
| 25 | +def _tokenize(source): |
| 26 | + """Tokenize Python source code.""" |
| 27 | + # Using an undocumented API as the documented one in Python 2.7 does not work as needed |
| 28 | + # cross-version. |
| 29 | + return tokenize.generate_tokens(io.StringIO(source).readline) |
| 30 | + |
| 31 | + |
| 32 | +def _indent_size(line): |
| 33 | + for index, char in enumerate(line): |
| 34 | + if not char.isspace(): |
| 35 | + return index |
| 36 | + |
| 37 | + |
| 38 | +def _get_global_statement_blocks(source, lines): |
| 39 | + """Return a list of all global statement blocks. |
| 40 | +
|
| 41 | + The list comprises of 3-item tuples that contain the starting line number, |
| 42 | + ending line number and whether the statement is a single line. |
| 43 | +
|
| 44 | + """ |
| 45 | + tree = ast.parse(source) |
| 46 | + visitor = Visitor(lines) |
| 47 | + visitor.visit(tree) |
| 48 | + |
| 49 | + statement_ranges = [] |
| 50 | + for index, line_number in enumerate(visitor.line_numbers_with_statements): |
| 51 | + remaining_line_numbers = visitor.line_numbers_with_statements[index+1:] |
| 52 | + end_line_number = len(lines) if len(remaining_line_numbers) == 0 else min(remaining_line_numbers) - 1 |
| 53 | + current_statement_is_oneline = line_number == end_line_number |
| 54 | + |
| 55 | + if len(statement_ranges) == 0: |
| 56 | + statement_ranges.append((line_number, end_line_number, current_statement_is_oneline)) |
| 57 | + continue |
| 58 | + |
| 59 | + previous_statement = statement_ranges[-1] |
| 60 | + previous_statement_is_oneline = previous_statement[2] |
| 61 | + if previous_statement_is_oneline and current_statement_is_oneline: |
| 62 | + statement_ranges[-1] = previous_statement[0], end_line_number, True |
| 63 | + else: |
| 64 | + statement_ranges.append((line_number, end_line_number, current_statement_is_oneline)) |
| 65 | + |
| 66 | + return statement_ranges |
| 67 | + |
| 68 | + |
| 69 | +def normalize_lines(source): |
| 70 | + """Normalize blank lines for sending to the terminal. |
| 71 | +
|
| 72 | + Blank lines within a statement block are removed to prevent the REPL |
| 73 | + from thinking the block is finished. Newlines are added to separate |
| 74 | + top-level statements so that the REPL does not think there is a syntax |
| 75 | + error. |
| 76 | +
|
| 77 | + """ |
| 78 | + lines = source.splitlines(False) |
| 79 | + # Find out if we have any trailing blank lines |
| 80 | + has_blank_lines = len(lines[-1].strip()) == 0 or source.endswith(os.linesep) |
| 81 | + |
| 82 | + # Step 1: Remove empty lines. |
| 83 | + tokens = _tokenize(source) |
| 84 | + newlines_indexes_to_remove = (spos[0] for (toknum, tokval, spos, epos, line) in tokens |
| 85 | + if len(line.strip()) == 0 and token.tok_name[toknum] == 'NL' and spos[0] == epos[0]) |
| 86 | + |
| 87 | + for line_number in reversed(list(newlines_indexes_to_remove)): |
| 88 | + del lines[line_number-1] |
| 89 | + |
| 90 | + # Step 2: Add blank lines between each global statement block. |
| 91 | + # A consequtive single lines blocks of code will be treated as a single statement, |
| 92 | + # just to ensure we do not unnecessarily add too many blank lines. |
| 93 | + source = os.linesep.join(lines) |
| 94 | + tokens = _tokenize(source) |
| 95 | + dedent_indexes = (spos[0] for (toknum, tokval, spos, epos, line) in tokens |
| 96 | + if toknum == token.DEDENT and _indent_size(line) == 0) |
| 97 | + |
| 98 | + global_statement_ranges = _get_global_statement_blocks(source, lines) |
| 99 | + |
| 100 | + for line_number in filter(lambda x: x > 1, map(operator.itemgetter(0), reversed(global_statement_ranges))): |
| 101 | + lines.insert(line_number-1, '') |
| 102 | + |
| 103 | + sys.stdout.write(os.linesep.join(lines) + (os.linesep if has_blank_lines else '')) |
| 104 | + sys.stdout.flush() |
| 105 | + |
| 106 | + |
| 107 | +if __name__ == '__main__': |
| 108 | + contents = sys.argv[1] |
| 109 | + try: |
| 110 | + default_encoding = sys.getdefaultencoding() |
| 111 | + contents = contents.encode(default_encoding, 'surrogateescape').decode(default_encoding, 'replace') |
| 112 | + except (UnicodeError, LookupError): |
| 113 | + pass |
| 114 | + if isinstance(contents, bytes): |
| 115 | + contents = contents.decode('utf8') |
| 116 | + normalize_lines(contents) |
0 commit comments