1+ """Generate source code formatted as HTML, with bottlenecks annotated and highlighted.
2+
3+ Various heuristics are used to detect common issues that cause slower than
4+ expected performance.
5+ """
6+
17from __future__ import annotations
28
39import os .path
410import sys
511from html import escape
12+ from typing import Final
613
714from mypy .build import BuildResult
8- from mypy .nodes import MypyFile
15+ from mypy .nodes import (
16+ CallExpr ,
17+ Expression ,
18+ ForStmt ,
19+ FuncDef ,
20+ LambdaExpr ,
21+ MemberExpr ,
22+ MypyFile ,
23+ NameExpr ,
24+ Node ,
25+ RefExpr ,
26+ TupleExpr ,
27+ TypeInfo ,
28+ Var ,
29+ )
30+ from mypy .traverser import TraverserVisitor
31+ from mypy .types import AnyType , Instance , ProperType , Type , TypeOfAny , get_proper_type
932from mypy .util import FancyFormatter
1033from mypyc .ir .func_ir import FuncIR
1134from mypyc .ir .module_ir import ModuleIR
12- from mypyc .ir .ops import CallC , LoadLiteral , Value
35+ from mypyc .ir .ops import CallC , LoadLiteral , LoadStatic , Value
36+
37+
38+ class Annotation :
39+ """HTML annotation for compiled source code"""
40+
41+ def __init__ (self , message : str , priority : int = 1 ) -> None :
42+ # Message as HTML that describes an issue and/or how to fix it.
43+ # Multiple messages on a line may be concatenated.
44+ self .message = message
45+ # If multiple annotations are generated for a single line, only report
46+ # the highest-priority ones. Some use cases generate multiple annotations,
47+ # and this can be used to reduce verbosity by hiding the lower-priority
48+ # ones.
49+ self .priority = priority
50+
51+
52+ op_hints : Final = {
53+ "PyNumber_Add" : Annotation ('Generic "+" operation.' ),
54+ "PyNumber_Subtract" : Annotation ('Generic "-" operation.' ),
55+ "PyNumber_Multiply" : Annotation ('Generic "*" operation.' ),
56+ "PyNumber_TrueDivide" : Annotation ('Generic "/" operation.' ),
57+ "PyNumber_FloorDivide" : Annotation ('Generic "//" operation.' ),
58+ "PyNumber_Positive" : Annotation ('Generic unary "+" operation.' ),
59+ "PyNumber_Negative" : Annotation ('Generic unary "-" operation.' ),
60+ "PyNumber_And" : Annotation ('Generic "&" operation.' ),
61+ "PyNumber_Or" : Annotation ('Generic "|" operation.' ),
62+ "PyNumber_Xor" : Annotation ('Generic "^" operation.' ),
63+ "PyNumber_Lshift" : Annotation ('Generic "<<" operation.' ),
64+ "PyNumber_Rshift" : Annotation ('Generic ">>" operation.' ),
65+ "PyNumber_Invert" : Annotation ('Generic "~" operation.' ),
66+ "PyObject_Call" : Annotation ("Generic call operation." ),
67+ "PyObject_RichCompare" : Annotation ("Generic comparison operation." ),
68+ "PyObject_GetItem" : Annotation ("Generic indexing operation." ),
69+ "PyObject_SetItem" : Annotation ("Generic indexed assignment." ),
70+ }
71+
72+ stdlib_hints : Final = {
73+ "functools.partial" : Annotation (
74+ '"functools.partial" is inefficient in compiled code.' , priority = 2
75+ ),
76+ "itertools.chain" : Annotation (
77+ '"itertools.chain" is inefficient in compiled code (hint: replace with for loops).' ,
78+ priority = 2 ,
79+ ),
80+ "itertools.groupby" : Annotation (
81+ '"itertools.groupby" is inefficient in compiled code.' , priority = 2
82+ ),
83+ "itertools.islice" : Annotation (
84+ '"itertools.islice" is inefficient in compiled code (hint: replace with for loop over index range).' ,
85+ priority = 2 ,
86+ ),
87+ }
1388
1489CSS = """\
1590 .collapsible {
44119
45120
46121class AnnotatedSource :
47- def __init__ (self , path : str , annotations : dict [int , list [str ]]) -> None :
122+ """Annotations for a single compiled source file."""
123+
124+ def __init__ (self , path : str , annotations : dict [int , list [Annotation ]]) -> None :
48125 self .path = path
49126 self .annotations = annotations
50127
@@ -57,7 +134,7 @@ def generate_annotated_html(
57134 path = result .graph [mod ].path
58135 tree = result .graph [mod ].tree
59136 assert tree is not None
60- annotations .append (generate_annotations (path or "<source>" , tree , mod_ir ))
137+ annotations .append (generate_annotations (path or "<source>" , tree , mod_ir , result . types ))
61138 html = generate_html_report (annotations )
62139 with open (html_fnam , "w" ) as f :
63140 f .write (html )
@@ -67,40 +144,172 @@ def generate_annotated_html(
67144 print (f"\n Wrote { formatted } -- open in browser to view\n " )
68145
69146
70- def generate_annotations (path : str , tree : MypyFile , ir : ModuleIR ) -> AnnotatedSource :
147+ def generate_annotations (
148+ path : str , tree : MypyFile , ir : ModuleIR , type_map : dict [Expression , Type ]
149+ ) -> AnnotatedSource :
71150 anns = {}
72151 for func_ir in ir .functions :
73- anns .update (function_annotations (func_ir ))
152+ anns .update (function_annotations (func_ir , tree ))
153+ visitor = ASTAnnotateVisitor (type_map )
154+ for defn in tree .defs :
155+ defn .accept (visitor )
156+ anns .update (visitor .anns )
74157 return AnnotatedSource (path , anns )
75158
76159
77- def function_annotations (func_ir : FuncIR ) -> dict [int , list [str ]]:
160+ def function_annotations (func_ir : FuncIR , tree : MypyFile ) -> dict [int , list [Annotation ]]:
161+ """Generate annotations based on mypyc IR."""
78162 # TODO: check if func_ir.line is -1
79- anns : dict [int , list [str ]] = {}
163+ anns : dict [int , list [Annotation ]] = {}
80164 for block in func_ir .blocks :
81165 for op in block .ops :
82166 if isinstance (op , CallC ):
83167 name = op .function_name
84- ann = None
168+ ann : str | Annotation | None = None
85169 if name == "CPyObject_GetAttr" :
86170 attr_name = get_str_literal (op .args [1 ])
87- if attr_name :
171+ if attr_name == "__prepare__" :
172+ # These attributes are internal to mypyc/CPython, and the user has
173+ # little control over them.
174+ ann = None
175+ elif attr_name :
88176 ann = f'Get non-native attribute "{ attr_name } ".'
89177 else :
90178 ann = "Dynamic attribute lookup."
91- elif name == "PyNumber_Add" :
92- ann = 'Generic "+" operation.'
179+ elif name == "PyObject_VectorcallMethod" :
180+ method_name = get_str_literal (op .args [0 ])
181+ if method_name :
182+ ann = f'Call non-native method "{ method_name } ".'
183+ else :
184+ ann = "Dynamic method call."
185+ elif name in op_hints :
186+ ann = op_hints [name ]
187+ elif name in ("CPyDict_GetItem" , "CPyDict_SetItem" ):
188+ if (
189+ isinstance (op .args [0 ], LoadStatic )
190+ and isinstance (op .args [1 ], LoadLiteral )
191+ and func_ir .name != "__top_level__"
192+ ):
193+ load = op .args [0 ]
194+ name = str (op .args [1 ].value )
195+ sym = tree .names .get (name )
196+ if (
197+ sym
198+ and sym .node
199+ and load .namespace == "static"
200+ and load .identifier == "globals"
201+ ):
202+ if sym .node .fullname in stdlib_hints :
203+ ann = stdlib_hints [sym .node .fullname ]
204+ elif isinstance (sym .node , Var ):
205+ ann = (
206+ f'Access global "{ name } " through namespace '
207+ + "dictionary (hint: access is faster if you can make it Final)."
208+ )
209+ else :
210+ ann = f'Access "{ name } " through global namespace dictionary.'
93211 if ann :
212+ if isinstance (ann , str ):
213+ ann = Annotation (ann )
94214 anns .setdefault (op .line , []).append (ann )
95215 return anns
96216
97217
218+ class ASTAnnotateVisitor (TraverserVisitor ):
219+ """Generate annotations from mypy AST and inferred types."""
220+
221+ def __init__ (self , type_map : dict [Expression , Type ]) -> None :
222+ self .anns : dict [int , list [Annotation ]] = {}
223+ self .func_depth = 0
224+ self .type_map = type_map
225+
226+ def visit_func_def (self , o : FuncDef , / ) -> None :
227+ if self .func_depth > 0 :
228+ self .annotate (
229+ o ,
230+ "A nested function object is allocated each time statement is executed. "
231+ + "A module-level function would be faster." ,
232+ )
233+ self .func_depth += 1
234+ super ().visit_func_def (o )
235+ self .func_depth -= 1
236+
237+ def visit_for_stmt (self , o : ForStmt , / ) -> None :
238+ typ = self .get_type (o .expr )
239+ if isinstance (typ , AnyType ):
240+ self .annotate (o .expr , 'For loop uses generic operations (iterable has type "Any").' )
241+ elif isinstance (typ , Instance ) and typ .type .fullname in (
242+ "typing.Iterable" ,
243+ "typing.Iterator" ,
244+ "typing.Sequence" ,
245+ "typing.MutableSequence" ,
246+ ):
247+ self .annotate (
248+ o .expr ,
249+ f'For loop uses generic operations (iterable has the abstract type "{ typ .type .fullname } ").' ,
250+ )
251+ super ().visit_for_stmt (o )
252+
253+ def visit_name_expr (self , o : NameExpr , / ) -> None :
254+ if ann := stdlib_hints .get (o .fullname ):
255+ self .annotate (o , ann )
256+
257+ def visit_member_expr (self , o : MemberExpr , / ) -> None :
258+ super ().visit_member_expr (o )
259+ if ann := stdlib_hints .get (o .fullname ):
260+ self .annotate (o , ann )
261+
262+ def visit_call_expr (self , o : CallExpr , / ) -> None :
263+ super ().visit_call_expr (o )
264+ if (
265+ isinstance (o .callee , RefExpr )
266+ and o .callee .fullname == "builtins.isinstance"
267+ and len (o .args ) == 2
268+ ):
269+ arg = o .args [1 ]
270+ self .check_isinstance_arg (arg )
271+
272+ def check_isinstance_arg (self , arg : Expression ) -> None :
273+ if isinstance (arg , RefExpr ):
274+ if isinstance (arg .node , TypeInfo ) and arg .node .is_protocol :
275+ self .annotate (
276+ arg , f'Expensive isinstance() check against protocol "{ arg .node .name } ".'
277+ )
278+ elif isinstance (arg , TupleExpr ):
279+ for item in arg .items :
280+ self .check_isinstance_arg (item )
281+
282+ def visit_lambda_expr (self , o : LambdaExpr , / ) -> None :
283+ self .annotate (
284+ o ,
285+ "A new object is allocated for lambda each time it is evaluated. "
286+ + "A module-level function would be faster." ,
287+ )
288+ super ().visit_lambda_expr (o )
289+
290+ def annotate (self , o : Node , ann : str | Annotation ) -> None :
291+ if isinstance (ann , str ):
292+ ann = Annotation (ann )
293+ self .anns .setdefault (o .line , []).append (ann )
294+
295+ def get_type (self , e : Expression ) -> ProperType :
296+ t = self .type_map .get (e )
297+ if t :
298+ return get_proper_type (t )
299+ return AnyType (TypeOfAny .unannotated )
300+
301+
98302def get_str_literal (v : Value ) -> str | None :
99303 if isinstance (v , LoadLiteral ) and isinstance (v .value , str ):
100304 return v .value
101305 return None
102306
103307
308+ def get_max_prio (anns : list [Annotation ]) -> list [Annotation ]:
309+ max_prio = max (a .priority for a in anns )
310+ return [a for a in anns if a .priority == max_prio ]
311+
312+
104313def generate_html_report (sources : list [AnnotatedSource ]) -> str :
105314 html = []
106315 html .append ("<html>\n <head>\n " )
@@ -110,15 +319,17 @@ def generate_html_report(sources: list[AnnotatedSource]) -> str:
110319 for src in sources :
111320 html .append (f"<h2><tt>{ src .path } </tt></h2>\n " )
112321 html .append ("<pre>" )
113- anns = src .annotations
322+ src_anns = src .annotations
114323 with open (src .path ) as f :
115324 lines = f .readlines ()
116325 for i , s in enumerate (lines ):
117326 s = escape (s )
118327 line = i + 1
119328 linenum = "%5d" % line
120- if line in anns :
121- hint = " " .join (anns [line ])
329+ if line in src_anns :
330+ anns = get_max_prio (src_anns [line ])
331+ ann_strs = [a .message for a in anns ]
332+ hint = " " .join (ann_strs )
122333 s = colorize_line (linenum , s , hint_html = hint )
123334 else :
124335 s = linenum + " " + s
0 commit comments