Skip to content

Commit 568b3ff

Browse files
committed
Fix handling of docstrings with tokenization errors
1 parent e2b821b commit 568b3ff

File tree

2 files changed

+69
-8
lines changed

2 files changed

+69
-8
lines changed

mypy/stubdoc.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -347,14 +347,22 @@ def infer_sig_from_docstring(docstr: str | None, name: str) -> list[FunctionSig]
347347
return None
348348

349349
state = DocStringParser(name)
350-
# Return all found signatures, even if there is a parse error after some are found.
351-
with contextlib.suppress(tokenize.TokenError):
352-
try:
353-
tokens = tokenize.tokenize(io.BytesIO(docstr.encode("utf-8")).readline)
354-
for token in tokens:
355-
state.add_token(token)
356-
except IndentationError:
357-
return None
350+
351+
# Keep tokenizing after an error. If `TokenError` is enountered, tokenize() will
352+
# stop. We check the remaining bytes in bytes_io and resume tokenizing on the next
353+
# loop iteration.
354+
encoded_docstr = docstr.encode("utf-8")
355+
bytes_io = io.BytesIO(encoded_docstr)
356+
while bytes_io.tell() < len(encoded_docstr):
357+
# Return all found signatures, even if there is a parse error after some are found.
358+
with contextlib.suppress(tokenize.TokenError):
359+
try:
360+
tokens = tokenize.tokenize(bytes_io.readline)
361+
for token in tokens:
362+
state.add_token(token)
363+
except IndentationError:
364+
return None
365+
358366
sigs = state.get_signatures()
359367

360368
def is_unique_args(sig: FunctionSig) -> bool:

mypy/test/teststubgen.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -429,6 +429,59 @@ def test_infer_sig_from_docstring_square_brackets(self) -> None:
429429
== []
430430
)
431431

432+
def test_infer_sig_from_docstring_unterminated_string_literal(self) -> None:
433+
docstring = """
434+
func(*args, **kwargs)
435+
Overloaded function.
436+
437+
1. func(x: int) -> None
438+
439+
This is a valid docstring with an "unterminated string literal.
440+
441+
2. func(x: int, y: int) -> str
442+
443+
This is an overloaded method.
444+
"""
445+
sigs = infer_sig_from_docstring(docstring, name="func")
446+
assert_equal(
447+
sigs[0], FunctionSig(name="func", args=[ArgSig(name="x", type="int")], ret_type="None")
448+
)
449+
assert_equal(
450+
sigs[1],
451+
FunctionSig(
452+
name="func",
453+
args=[ArgSig(name="x", type="int"), ArgSig(name="y", type="int")],
454+
ret_type="str",
455+
),
456+
)
457+
458+
def test_infer_sig_from_docstring_latex(self) -> None:
459+
docstring = """
460+
func(*args, **kwargs)
461+
Overloaded function.
462+
463+
1. func(x: int) -> None
464+
465+
.. math::
466+
\\mathbf{f}\\left(x\\right) = \\pi \\cdot x
467+
468+
2. func(x: int, y: int) -> str
469+
470+
This is an overloaded method.
471+
"""
472+
sigs = infer_sig_from_docstring(docstring, name="func")
473+
assert_equal(
474+
sigs[0], FunctionSig(name="func", args=[ArgSig(name="x", type="int")], ret_type="None")
475+
)
476+
assert_equal(
477+
sigs[1],
478+
FunctionSig(
479+
name="func",
480+
args=[ArgSig(name="x", type="int"), ArgSig(name="y", type="int")],
481+
ret_type="str",
482+
),
483+
)
484+
432485
def test_remove_misplaced_type_comments_1(self) -> None:
433486
good = """
434487
\u1234

0 commit comments

Comments
 (0)