Skip to content

Commit 573125e

Browse files
use python's tokenize, in order to limit the replacement to only comments
note also that the 'not start of line' constraint had to be removed from the regex, because now each comment is encountered individually and thus they are at the start
1 parent d697114 commit 573125e

File tree

1 file changed

+19
-10
lines changed

1 file changed

+19
-10
lines changed

mypy/fastparse.py

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
from __future__ import annotations
22

33
import copy
4+
import io
45
import re
56
import sys
7+
import tokenize
68
import warnings
79
from typing import Any, Callable, Final, List, Optional, Sequence, TypeVar, Union, cast
810
from typing_extensions import Literal, overload
@@ -138,19 +140,26 @@
138140
def ast3_parse(
139141
source: str | bytes, filename: str, mode: str, feature_version: int = PY_MINOR_VERSION
140142
) -> AST:
143+
"""This function is just a convenience wrapper around ast.parse, with default flags useful to Mypy.
144+
It also incorporates a hack to accomodate `# mypy: ignore` comments, which are treated by mypy as `# type: ignore` comments."""
141145
# Hack to support "mypy: ignore" comments until the builtin compile function changes to allow us to detect it otherwise:
142-
# (does not apply at the start of the line to avoid conflicting with mypy file configuration comments https://mypy.readthedocs.io/en/stable/inline_config.html ; see also, util.get_mypy_comments in this codebase)
146+
# (Note: completely distinct from https://mypy.readthedocs.io/en/stable/inline_config.html ; see also, util.get_mypy_comments in this codebase)
147+
148+
# We make the substitution in comments, and to find those comments we use Python's `tokenize`.
149+
# https://docs.python.org/3/library/tokenize.html has a big red **Warning:**
150+
# Note that the functions in this module are only designed to parse syntactically valid Python code (code that does not raise when parsed using ast.parse()). The behavior of the functions in this module is **undefined** when providing invalid Python code and it can change at any point.
151+
# So, we cannot rely on roundtrip behavior in tokenize iff ast.parse would throw when given `source`.
152+
# The simplest way to deal with that is just to call ast.parse twice, once before and once after. So, we do that.
153+
p = lambda: ast3.parse(source, filename, mode, type_comments=True, feature_version=feature_version)
154+
p() # Call to assure syntactic validity (will throw an exception otherwise, exiting this function).
143155
if isinstance(source, str):
144-
source = re.sub(r"(?<!^)#\s*mypy:\s*ignore(?![-_])", "# type: ignore", source)
156+
tokens = tokenize.generate_tokens(io.StringIO(source).readline)
157+
to_find, to_replace = r"#\s*mypy:\s*ignore(?![-_])", "# type: ignore"
145158
else:
146-
source = re.sub(rb"(?<!^)#\s*mypy:\s*ignore(?![-_])", b"# type: ignore", source)
147-
return ast3.parse(
148-
source,
149-
filename,
150-
mode,
151-
type_comments=True, # This works the magic
152-
feature_version=feature_version,
153-
)
159+
tokens = tokenize.tokenize(io.BytesIO(source).readline)
160+
to_find, to_replace = rb"#\s*mypy:\s*ignore(?![-_])", b"# type: ignore"
161+
source = tokenize.untokenize((t, re.sub(to_find, to_replace, s) if t == tokenize.COMMENT else s) for t, s, *_ in tokens)
162+
return p()
154163

155164

156165
NamedExpr = ast3.NamedExpr

0 commit comments

Comments
 (0)