Skip to content

Commit b1be379

Browse files
authored
[mypyc] Detect more issues when generating annotated HTML (#18838)
Add new heuristics to detect various potential performance issues when using `-a foo.html`. The issues include various generic (non-specialized) operations, uses of `functools` and `itertools`, and slow `isinstance` checks that use runtime-checkable protocols. Implement a mypy AST visitor that is used to detect some issues that would be harder to detect when analyzing the generated IR. Support annotation priorities so that if multiple annotations are generated for a line, only the highest-priority ones are shown. This is a bit crude but useful, since often multiple heuristics are triggered by some inefficient code, and duplicate annotations would be verbose and sometimes confusing.
1 parent f629589 commit b1be379

File tree

6 files changed

+499
-29
lines changed

6 files changed

+499
-29
lines changed

mypyc/annotate.py

Lines changed: 226 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,90 @@
1+
"""Generate source code formatted as HTML, with bottlenecks annotated and highlighted.
2+
3+
Various heuristics are used to detect common issues that cause slower than
4+
expected performance.
5+
"""
6+
17
from __future__ import annotations
28

39
import os.path
410
import sys
511
from html import escape
12+
from typing import Final
613

714
from mypy.build import BuildResult
8-
from mypy.nodes import MypyFile
15+
from mypy.nodes import (
16+
CallExpr,
17+
Expression,
18+
ForStmt,
19+
FuncDef,
20+
LambdaExpr,
21+
MemberExpr,
22+
MypyFile,
23+
NameExpr,
24+
Node,
25+
RefExpr,
26+
TupleExpr,
27+
TypeInfo,
28+
Var,
29+
)
30+
from mypy.traverser import TraverserVisitor
31+
from mypy.types import AnyType, Instance, ProperType, Type, TypeOfAny, get_proper_type
932
from mypy.util import FancyFormatter
1033
from mypyc.ir.func_ir import FuncIR
1134
from mypyc.ir.module_ir import ModuleIR
12-
from mypyc.ir.ops import CallC, LoadLiteral, Value
35+
from mypyc.ir.ops import CallC, LoadLiteral, LoadStatic, Value
36+
37+
38+
class Annotation:
39+
"""HTML annotation for compiled source code"""
40+
41+
def __init__(self, message: str, priority: int = 1) -> None:
42+
# Message as HTML that describes an issue and/or how to fix it.
43+
# Multiple messages on a line may be concatenated.
44+
self.message = message
45+
# If multiple annotations are generated for a single line, only report
46+
# the highest-priority ones. Some use cases generate multiple annotations,
47+
# and this can be used to reduce verbosity by hiding the lower-priority
48+
# ones.
49+
self.priority = priority
50+
51+
52+
op_hints: Final = {
53+
"PyNumber_Add": Annotation('Generic "+" operation.'),
54+
"PyNumber_Subtract": Annotation('Generic "-" operation.'),
55+
"PyNumber_Multiply": Annotation('Generic "*" operation.'),
56+
"PyNumber_TrueDivide": Annotation('Generic "/" operation.'),
57+
"PyNumber_FloorDivide": Annotation('Generic "//" operation.'),
58+
"PyNumber_Positive": Annotation('Generic unary "+" operation.'),
59+
"PyNumber_Negative": Annotation('Generic unary "-" operation.'),
60+
"PyNumber_And": Annotation('Generic "&" operation.'),
61+
"PyNumber_Or": Annotation('Generic "|" operation.'),
62+
"PyNumber_Xor": Annotation('Generic "^" operation.'),
63+
"PyNumber_Lshift": Annotation('Generic "<<" operation.'),
64+
"PyNumber_Rshift": Annotation('Generic ">>" operation.'),
65+
"PyNumber_Invert": Annotation('Generic "~" operation.'),
66+
"PyObject_Call": Annotation("Generic call operation."),
67+
"PyObject_RichCompare": Annotation("Generic comparison operation."),
68+
"PyObject_GetItem": Annotation("Generic indexing operation."),
69+
"PyObject_SetItem": Annotation("Generic indexed assignment."),
70+
}
71+
72+
stdlib_hints: Final = {
73+
"functools.partial": Annotation(
74+
'"functools.partial" is inefficient in compiled code.', priority=2
75+
),
76+
"itertools.chain": Annotation(
77+
'"itertools.chain" is inefficient in compiled code (hint: replace with for loops).',
78+
priority=2,
79+
),
80+
"itertools.groupby": Annotation(
81+
'"itertools.groupby" is inefficient in compiled code.', priority=2
82+
),
83+
"itertools.islice": Annotation(
84+
'"itertools.islice" is inefficient in compiled code (hint: replace with for loop over index range).',
85+
priority=2,
86+
),
87+
}
1388

1489
CSS = """\
1590
.collapsible {
@@ -44,7 +119,9 @@
44119

45120

46121
class AnnotatedSource:
47-
def __init__(self, path: str, annotations: dict[int, list[str]]) -> None:
122+
"""Annotations for a single compiled source file."""
123+
124+
def __init__(self, path: str, annotations: dict[int, list[Annotation]]) -> None:
48125
self.path = path
49126
self.annotations = annotations
50127

@@ -57,7 +134,7 @@ def generate_annotated_html(
57134
path = result.graph[mod].path
58135
tree = result.graph[mod].tree
59136
assert tree is not None
60-
annotations.append(generate_annotations(path or "<source>", tree, mod_ir))
137+
annotations.append(generate_annotations(path or "<source>", tree, mod_ir, result.types))
61138
html = generate_html_report(annotations)
62139
with open(html_fnam, "w") as f:
63140
f.write(html)
@@ -67,40 +144,172 @@ def generate_annotated_html(
67144
print(f"\nWrote {formatted} -- open in browser to view\n")
68145

69146

70-
def generate_annotations(path: str, tree: MypyFile, ir: ModuleIR) -> AnnotatedSource:
147+
def generate_annotations(
148+
path: str, tree: MypyFile, ir: ModuleIR, type_map: dict[Expression, Type]
149+
) -> AnnotatedSource:
71150
anns = {}
72151
for func_ir in ir.functions:
73-
anns.update(function_annotations(func_ir))
152+
anns.update(function_annotations(func_ir, tree))
153+
visitor = ASTAnnotateVisitor(type_map)
154+
for defn in tree.defs:
155+
defn.accept(visitor)
156+
anns.update(visitor.anns)
74157
return AnnotatedSource(path, anns)
75158

76159

77-
def function_annotations(func_ir: FuncIR) -> dict[int, list[str]]:
160+
def function_annotations(func_ir: FuncIR, tree: MypyFile) -> dict[int, list[Annotation]]:
161+
"""Generate annotations based on mypyc IR."""
78162
# TODO: check if func_ir.line is -1
79-
anns: dict[int, list[str]] = {}
163+
anns: dict[int, list[Annotation]] = {}
80164
for block in func_ir.blocks:
81165
for op in block.ops:
82166
if isinstance(op, CallC):
83167
name = op.function_name
84-
ann = None
168+
ann: str | Annotation | None = None
85169
if name == "CPyObject_GetAttr":
86170
attr_name = get_str_literal(op.args[1])
87-
if attr_name:
171+
if attr_name == "__prepare__":
172+
# These attributes are internal to mypyc/CPython, and the user has
173+
# little control over them.
174+
ann = None
175+
elif attr_name:
88176
ann = f'Get non-native attribute "{attr_name}".'
89177
else:
90178
ann = "Dynamic attribute lookup."
91-
elif name == "PyNumber_Add":
92-
ann = 'Generic "+" operation.'
179+
elif name == "PyObject_VectorcallMethod":
180+
method_name = get_str_literal(op.args[0])
181+
if method_name:
182+
ann = f'Call non-native method "{method_name}".'
183+
else:
184+
ann = "Dynamic method call."
185+
elif name in op_hints:
186+
ann = op_hints[name]
187+
elif name in ("CPyDict_GetItem", "CPyDict_SetItem"):
188+
if (
189+
isinstance(op.args[0], LoadStatic)
190+
and isinstance(op.args[1], LoadLiteral)
191+
and func_ir.name != "__top_level__"
192+
):
193+
load = op.args[0]
194+
name = str(op.args[1].value)
195+
sym = tree.names.get(name)
196+
if (
197+
sym
198+
and sym.node
199+
and load.namespace == "static"
200+
and load.identifier == "globals"
201+
):
202+
if sym.node.fullname in stdlib_hints:
203+
ann = stdlib_hints[sym.node.fullname]
204+
elif isinstance(sym.node, Var):
205+
ann = (
206+
f'Access global "{name}" through namespace '
207+
+ "dictionary (hint: access is faster if you can make it Final)."
208+
)
209+
else:
210+
ann = f'Access "{name}" through global namespace dictionary.'
93211
if ann:
212+
if isinstance(ann, str):
213+
ann = Annotation(ann)
94214
anns.setdefault(op.line, []).append(ann)
95215
return anns
96216

97217

218+
class ASTAnnotateVisitor(TraverserVisitor):
219+
"""Generate annotations from mypy AST and inferred types."""
220+
221+
def __init__(self, type_map: dict[Expression, Type]) -> None:
222+
self.anns: dict[int, list[Annotation]] = {}
223+
self.func_depth = 0
224+
self.type_map = type_map
225+
226+
def visit_func_def(self, o: FuncDef, /) -> None:
227+
if self.func_depth > 0:
228+
self.annotate(
229+
o,
230+
"A nested function object is allocated each time statement is executed. "
231+
+ "A module-level function would be faster.",
232+
)
233+
self.func_depth += 1
234+
super().visit_func_def(o)
235+
self.func_depth -= 1
236+
237+
def visit_for_stmt(self, o: ForStmt, /) -> None:
238+
typ = self.get_type(o.expr)
239+
if isinstance(typ, AnyType):
240+
self.annotate(o.expr, 'For loop uses generic operations (iterable has type "Any").')
241+
elif isinstance(typ, Instance) and typ.type.fullname in (
242+
"typing.Iterable",
243+
"typing.Iterator",
244+
"typing.Sequence",
245+
"typing.MutableSequence",
246+
):
247+
self.annotate(
248+
o.expr,
249+
f'For loop uses generic operations (iterable has the abstract type "{typ.type.fullname}").',
250+
)
251+
super().visit_for_stmt(o)
252+
253+
def visit_name_expr(self, o: NameExpr, /) -> None:
254+
if ann := stdlib_hints.get(o.fullname):
255+
self.annotate(o, ann)
256+
257+
def visit_member_expr(self, o: MemberExpr, /) -> None:
258+
super().visit_member_expr(o)
259+
if ann := stdlib_hints.get(o.fullname):
260+
self.annotate(o, ann)
261+
262+
def visit_call_expr(self, o: CallExpr, /) -> None:
263+
super().visit_call_expr(o)
264+
if (
265+
isinstance(o.callee, RefExpr)
266+
and o.callee.fullname == "builtins.isinstance"
267+
and len(o.args) == 2
268+
):
269+
arg = o.args[1]
270+
self.check_isinstance_arg(arg)
271+
272+
def check_isinstance_arg(self, arg: Expression) -> None:
273+
if isinstance(arg, RefExpr):
274+
if isinstance(arg.node, TypeInfo) and arg.node.is_protocol:
275+
self.annotate(
276+
arg, f'Expensive isinstance() check against protocol "{arg.node.name}".'
277+
)
278+
elif isinstance(arg, TupleExpr):
279+
for item in arg.items:
280+
self.check_isinstance_arg(item)
281+
282+
def visit_lambda_expr(self, o: LambdaExpr, /) -> None:
283+
self.annotate(
284+
o,
285+
"A new object is allocated for lambda each time it is evaluated. "
286+
+ "A module-level function would be faster.",
287+
)
288+
super().visit_lambda_expr(o)
289+
290+
def annotate(self, o: Node, ann: str | Annotation) -> None:
291+
if isinstance(ann, str):
292+
ann = Annotation(ann)
293+
self.anns.setdefault(o.line, []).append(ann)
294+
295+
def get_type(self, e: Expression) -> ProperType:
296+
t = self.type_map.get(e)
297+
if t:
298+
return get_proper_type(t)
299+
return AnyType(TypeOfAny.unannotated)
300+
301+
98302
def get_str_literal(v: Value) -> str | None:
99303
if isinstance(v, LoadLiteral) and isinstance(v.value, str):
100304
return v.value
101305
return None
102306

103307

308+
def get_max_prio(anns: list[Annotation]) -> list[Annotation]:
309+
max_prio = max(a.priority for a in anns)
310+
return [a for a in anns if a.priority == max_prio]
311+
312+
104313
def generate_html_report(sources: list[AnnotatedSource]) -> str:
105314
html = []
106315
html.append("<html>\n<head>\n")
@@ -110,15 +319,17 @@ def generate_html_report(sources: list[AnnotatedSource]) -> str:
110319
for src in sources:
111320
html.append(f"<h2><tt>{src.path}</tt></h2>\n")
112321
html.append("<pre>")
113-
anns = src.annotations
322+
src_anns = src.annotations
114323
with open(src.path) as f:
115324
lines = f.readlines()
116325
for i, s in enumerate(lines):
117326
s = escape(s)
118327
line = i + 1
119328
linenum = "%5d" % line
120-
if line in anns:
121-
hint = " ".join(anns[line])
329+
if line in src_anns:
330+
anns = get_max_prio(src_anns[line])
331+
ann_strs = [a.message for a in anns]
332+
hint = " ".join(ann_strs)
122333
s = colorize_line(linenum, s, hint_html=hint)
123334
else:
124335
s = linenum + " " + s

mypyc/irbuild/ll_builder.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1161,7 +1161,7 @@ def gen_method_call(
11611161
"""Generate either a native or Python method call."""
11621162
# If we have *args, then fallback to Python method call.
11631163
if arg_kinds is not None and any(kind.is_star() for kind in arg_kinds):
1164-
return self.py_method_call(base, name, arg_values, base.line, arg_kinds, arg_names)
1164+
return self.py_method_call(base, name, arg_values, line, arg_kinds, arg_names)
11651165

11661166
# If the base type is one of ours, do a MethodCall
11671167
if (

0 commit comments

Comments
 (0)