|
1 | 1 | from __future__ import annotations |
2 | 2 |
|
3 | 3 | import copy |
| 4 | +import io |
4 | 5 | import re |
5 | 6 | import sys |
| 7 | +import tokenize |
6 | 8 | import warnings |
7 | 9 | from typing import Any, Callable, Final, List, Optional, Sequence, TypeVar, Union, cast |
8 | 10 | from typing_extensions import Literal, overload |
|
138 | 140 | def ast3_parse( |
139 | 141 | source: str | bytes, filename: str, mode: str, feature_version: int = PY_MINOR_VERSION |
140 | 142 | ) -> AST: |
| 143 | + """This function is just a convenience wrapper around ast.parse, with default flags useful to Mypy. |
| 144 | + It also incorporates a hack to accomodate `# mypy: ignore` comments, which are treated by mypy as `# type: ignore` comments.""" |
141 | 145 | # Hack to support "mypy: ignore" comments until the builtin compile function changes to allow us to detect it otherwise: |
142 | | - # (does not apply at the start of the line to avoid conflicting with mypy file configuration comments https://mypy.readthedocs.io/en/stable/inline_config.html ; see also, util.get_mypy_comments in this codebase) |
| 146 | + # (Note: completely distinct from https://mypy.readthedocs.io/en/stable/inline_config.html ; see also, util.get_mypy_comments in this codebase) |
| 147 | + |
| 148 | + # We make the substitution in comments, and to find those comments we use Python's `tokenize`. |
| 149 | + # https://docs.python.org/3/library/tokenize.html has a big red **Warning:** |
| 150 | + # Note that the functions in this module are only designed to parse syntactically valid Python code (code that does not raise when parsed using ast.parse()). The behavior of the functions in this module is **undefined** when providing invalid Python code and it can change at any point. |
| 151 | + # So, we cannot rely on roundtrip behavior in tokenize iff ast.parse would throw when given `source`. |
| 152 | + # The simplest way to deal with that is just to call ast.parse twice, once before and once after. So, we do that. |
| 153 | + p = lambda: ast3.parse(source, filename, mode, type_comments=True, feature_version=feature_version) |
| 154 | + p() # Call to assure syntactic validity (will throw an exception otherwise, exiting this function). |
143 | 155 | if isinstance(source, str): |
144 | | - source = re.sub(r"(?<!^)#\s*mypy:\s*ignore(?![-_])", "# type: ignore", source) |
| 156 | + tokens = tokenize.generate_tokens(io.StringIO(source).readline) |
| 157 | + to_find, to_replace = r"#\s*mypy:\s*ignore(?![-_])", "# type: ignore" |
145 | 158 | else: |
146 | | - source = re.sub(rb"(?<!^)#\s*mypy:\s*ignore(?![-_])", b"# type: ignore", source) |
147 | | - return ast3.parse( |
148 | | - source, |
149 | | - filename, |
150 | | - mode, |
151 | | - type_comments=True, # This works the magic |
152 | | - feature_version=feature_version, |
153 | | - ) |
| 159 | + tokens = tokenize.tokenize(io.BytesIO(source).readline) |
| 160 | + to_find, to_replace = rb"#\s*mypy:\s*ignore(?![-_])", b"# type: ignore" |
| 161 | + source = tokenize.untokenize((t, re.sub(to_find, to_replace, s) if t == tokenize.COMMENT else s) for t, s, *_ in tokens) |
| 162 | + return p() |
154 | 163 |
|
155 | 164 |
|
156 | 165 | NamedExpr = ast3.NamedExpr |
|
0 commit comments