1
+ /**
2
+ * INTERNAL. DO NOT USE.
3
+ *
4
+ * Provides predicates for resolving imports.
5
+ */
6
+
1
7
private import python
2
8
private import semmle.python.dataflow.new.DataFlow
3
9
private import semmle.python.dataflow.new.internal.ImportStar
4
10
private import semmle.python.dataflow.new.TypeTracker
5
11
12
+ /**
13
+ * Python modules and the way imports are resolved are... complicated. Here's a crash course in how
14
+ * it works, as well as some caveats to bear in mind when looking at the implementation in this
15
+ * module.
16
+ *
17
+ * First, let's consider the humble `import` statement:
18
+ * ```python
19
+ * import foo
20
+ * import bar.baz
21
+ * import ham.eggs as spam
22
+ * ```
23
+ *
24
+ * In the AST, all imports are aliased, as in the last import above. That is, `import foo` becomes
25
+ * `import foo as foo`, and `import bar.baz` becomes `import bar as bar`. Note that `import` is
26
+ * exclusively used to import modules -- if `eggs` is an attribute of the `ham` module (and not a
27
+ * submodule of the `ham` package), then the third line above is an error.
28
+ *
29
+ * Next, we have the `from` statement. This one is a bit more complicated, but still has the same
30
+ * aliasing desugaring as above applied to it. Thus, `from foo import bar` becomes
31
+ * `from foo import bar as bar`.
32
+ *
33
+ * In general, `from foo import bar` can mean two different things:
34
+ *
35
+ * 1. If `foo` is a module, and `bar` is an attribute of `foo`, then `from foo import bar` imports
36
+ * the attribute `bar` into the current module (binding it to the name `bar`).
37
+ * 2. If `foo` is a package, and `bar` is a submodule of `foo`, then `from foo import bar` first imports
38
+ * `foo.bar`, and then attempts to locate the `bar` attribute again. In most cases, that attribute
39
+ * will then point to the `bar` submodule.
40
+ *
41
+ * Now, when in comes to how these imports are represented in the AST, things get a bit complicated.
42
+ * First of all, both of the above forms of imports get mapped to the same kind of AST node:
43
+ * `Import`. An `Import` node has a sequence of names, each of which is an `Alias` node. This `Alias`
44
+ * node represents the `x as y` bit of each imported module.
45
+ *
46
+ * The same is true for `from` imports. So, how then do we distinguish between the two forms of
47
+ * imports? The distinguishing feature is the left hand side of the `as` node. If the left hand side
48
+ * is an `ImportExpr`, then it is a plain import. If it is an `ImportMember`, then it is a `from`
49
+ * import. (And to confuse matters even more, this `ImportMember` contains another `ImportExpr` for
50
+ * the bit between the `from` and `import` keywords.)
51
+ *
52
+ * Caveats:
53
+ *
54
+ * - A relative import of the form `from .foo import bar as baz` not only imports `bar` and binds it
55
+ * to the name `baz`, but also imports `foo` and binds it to the name `foo`. This only happens with
56
+ * relative imports. `from foo import bar as baz` only binds `bar` to `baz`.
57
+ * - Modules may also be packages, so e.g. `import foo.bar` may import the `bar` submodule in the `foo`
58
+ * package, or the `bar` subpackage of the `foo` package. The practical difference here is the name of
59
+ * the module that is imported, as the package `foo.bar` will have the "name" `foo.bar.__init__`,
60
+ * corresponding to the fact that the code that is executed is in the `__init__.py` file of the
61
+ * `bar` package.
62
+ */
6
63
module ImportResolution {
7
64
/**
8
65
* Holds if the module `m` defines a name `name` by assigning `defn` to it. This is an
9
66
* overapproximation, as `name` may not in fact be exported (e.g. by defining an `__all__` that does
10
67
* not include `name`).
11
68
*/
69
+ pragma [ nomagic]
12
70
predicate module_export ( Module m , string name , DataFlow:: CfgNode defn ) {
13
71
exists ( EssaVariable v |
14
72
v .getName ( ) = name and
@@ -18,12 +76,216 @@ module ImportResolution {
18
76
or
19
77
defn .getNode ( ) = v .getDefinition ( ) .( ArgumentRefinement ) .getArgument ( )
20
78
)
79
+ or
80
+ exists ( Alias a |
81
+ defn .asExpr ( ) = [ a .getValue ( ) , a .getValue ( ) .( ImportMember ) .getModule ( ) ] and
82
+ a .getAsname ( ) .( Name ) .getId ( ) = name and
83
+ defn .getScope ( ) = m
84
+ )
85
+ }
86
+
87
+ /**
88
+ * Holds if the module `m` explicitly exports the name `name` by listing it in `__all__`. Only
89
+ * handles simple cases where we can statically tell that this is the case.
90
+ */
91
+ private predicate all_mentions_name ( Module m , string name ) {
92
+ exists ( DefinitionNode def , SequenceNode n |
93
+ def .getValue ( ) = n and
94
+ def .( NameNode ) .getId ( ) = "__all__" and
95
+ def .getScope ( ) = m and
96
+ any ( StrConst s | s .getText ( ) = name ) = n .getAnElement ( ) .getNode ( )
97
+ )
98
+ }
99
+
100
+ /**
101
+ * Holds if the module `m` either does not set `__all__` (and so implicitly exports anything that
102
+ * doesn't start with an underscore), or sets `__all__` in a way that's too complicated for us to
103
+ * handle (in which case we _also_ pretend that it just exports all such names).
104
+ */
105
+ private predicate no_or_complicated_all ( Module m ) {
106
+ // No mention of `__all__` in the module
107
+ not exists ( DefinitionNode def | def .getScope ( ) = m and def .( NameNode ) .getId ( ) = "__all__" )
108
+ or
109
+ // `__all__` is set to a non-sequence value
110
+ exists ( DefinitionNode def |
111
+ def .( NameNode ) .getId ( ) = "__all__" and
112
+ def .getScope ( ) = m and
113
+ not def .getValue ( ) instanceof SequenceNode
114
+ )
115
+ or
116
+ // `__all__` is used in some way that doesn't involve storing a value in it. This usually means
117
+ // it is being mutated through `append` or `extend`, which we don't handle.
118
+ exists ( NameNode n | n .getId ( ) = "__all__" and n .getScope ( ) = m and n .isLoad ( ) )
119
+ }
120
+
121
+ private predicate potential_module_export ( Module m , string name ) {
122
+ all_mentions_name ( m , name )
123
+ or
124
+ no_or_complicated_all ( m ) and
125
+ (
126
+ exists ( NameNode n | n .getId ( ) = name and n .getScope ( ) = m and name .charAt ( 0 ) != "_" )
127
+ or
128
+ exists ( Alias a | a .getAsname ( ) .( Name ) .getId ( ) = name and a .getValue ( ) .getScope ( ) = m )
129
+ )
130
+ }
131
+
132
+ /**
133
+ * Holds if the module `reexporter` exports the module `reexported` under the name
134
+ * `reexported_name`.
135
+ */
136
+ private predicate module_reexport ( Module reexporter , string reexported_name , Module reexported ) {
137
+ exists ( DataFlow:: Node ref |
138
+ ref = getImmediateModuleReference ( reexported ) and
139
+ module_export ( reexporter , reexported_name , ref ) and
140
+ potential_module_export ( reexporter , reexported_name )
141
+ )
142
+ }
143
+
144
+ /**
145
+ * Gets a reference to `sys.modules`.
146
+ */
147
+ private DataFlow:: Node sys_modules_reference ( ) {
148
+ result =
149
+ any ( DataFlow:: AttrRef a |
150
+ a .getAttributeName ( ) = "modules" and a .getObject ( ) .asExpr ( ) .( Name ) .getId ( ) = "sys"
151
+ )
152
+ }
153
+
154
+ /** Gets a module that may have been added to `sys.modules`. */
155
+ private Module sys_modules_module_with_name ( string name ) {
156
+ exists ( ControlFlowNode n , DataFlow:: Node mod |
157
+ exists ( SubscriptNode sub |
158
+ sub .getObject ( ) = sys_modules_reference ( ) .asCfgNode ( ) and
159
+ sub .getIndex ( ) = n and
160
+ n .getNode ( ) .( StrConst ) .getText ( ) = name and
161
+ sub .( DefinitionNode ) .getValue ( ) = mod .asCfgNode ( ) and
162
+ mod = getModuleReference ( result )
163
+ )
164
+ )
21
165
}
22
166
23
167
Module getModule ( DataFlow:: CfgNode node ) {
24
168
exists ( ModuleValue mv |
25
169
node .getNode ( ) .pointsTo ( mv ) and
26
170
result = mv .getScope ( )
171
+ Module getModuleImportedByImportStar( ImportStar i) {
172
+ isPreferredModuleForName ( result .getFile ( ) , i .getImportedModuleName ( ) )
173
+ }
174
+
175
+ /** Gets a data-flow node that may be a reference to a module with the name `module_name`. */
176
+ DataFlow:: Node getReferenceToModuleName ( string module_name ) {
177
+ // Regular import statements, e.g.
178
+ // import foo # implicitly `import foo as foo`
179
+ // import foo as foo_alias
180
+ exists ( Import i , Alias a | a = i .getAName ( ) |
181
+ result .asExpr ( ) = a .getAsname ( ) and
182
+ module_name = a .getValue ( ) .( ImportExpr ) .getImportedModuleName ( )
183
+ )
184
+ or
185
+ // The module part of a `from ... import ...` statement, e.g. the `..foo.bar` in
186
+ // from ..foo.bar import baz # ..foo.bar might point to, say, package.subpackage.foo.bar
187
+ exists ( ImportMember i | result .asExpr ( ) = i .getModule ( ) |
188
+ module_name = i .getModule ( ) .( ImportExpr ) .getImportedModuleName ( )
189
+ )
190
+ or
191
+ // Modules (not attributes) imported via `from ... import ... statements`, e.g.
192
+ // from foo.bar import baz # imports foo.bar.baz as baz
193
+ // from foo.bar import baz as baz_alias # imports foo.bar.baz as baz_alias
194
+ exists ( Import i , Alias a , ImportMember im | a = i .getAName ( ) and im = a .getValue ( ) |
195
+ i .isFromImport ( ) and
196
+ result .asExpr ( ) = a .getAsname ( ) and
197
+ module_name = im .getModule ( ) .( ImportExpr ) .getImportedModuleName ( ) + "." + im .getName ( )
198
+ )
199
+ or
200
+ // For parity with the points-to based solution, the `ImportExpr` and `ImportMember` bits of the
201
+ // above cases should _also_ point to the right modules.
202
+ result .asExpr ( ) = any ( ImportExpr i | i .getImportedModuleName ( ) = module_name )
203
+ or
204
+ result .asExpr ( ) =
205
+ any ( ImportMember i |
206
+ i .getModule ( ) .( ImportExpr ) .getImportedModuleName ( ) = module_name
207
+ or
208
+ i .getModule ( ) .( ImportExpr ) .getImportedModuleName ( ) + "." + i .getName ( ) = module_name and
209
+ none ( )
210
+ )
211
+ }
212
+
213
+ /** Gets a dataflow node that is an immediate reference to the module `m`. */
214
+ DataFlow:: Node getImmediateModuleReference ( Module m ) {
215
+ exists ( string module_name | result = getReferenceToModuleName ( module_name ) |
216
+ // Depending on whether the referenced module is a package or not, we may need to add a
217
+ // trailing `.__init__` to the module name.
218
+ isPreferredModuleForName ( m .getFile ( ) , module_name + [ "" , ".__init__" ] )
219
+ or
220
+ // Module defined via `sys.modules`
221
+ m = sys_modules_module_with_name ( module_name )
222
+ )
223
+ or
224
+ // Reading an attribute on a module may return a submodule (or subpackage).
225
+ exists ( DataFlow:: AttrRead ar , Module p , string attr_name |
226
+ ar .getObject ( ) = getModuleReference ( p ) and
227
+ attr_name = any ( Module m0 ) .getFile ( ) .getStem ( ) and
228
+ ar .getAttributeName ( ) = attr_name and
229
+ result = ar
230
+ |
231
+ isPreferredModuleForName ( m .getFile ( ) , p .getPackageName ( ) + "." + attr_name + [ "" , ".__init__" ] )
232
+ or
233
+ // This is also true for attributes that come from reexports.
234
+ module_reexport ( p , attr_name , m )
235
+ )
236
+ or
237
+ // Submodules that are implicitly defined when importing via `from ... import ...` statements.
238
+ // In practice, we create a definition for each module in a package, even if it is not imported.
239
+ exists ( string submodule , Module package |
240
+ SsaSource:: init_module_submodule_defn ( result .asVar ( ) .getSourceVariable ( ) ,
241
+ package .getEntryNode ( ) ) and
242
+ isPreferredModuleForName ( m .getFile ( ) ,
243
+ package .getPackageName ( ) + "." + submodule + [ "" , ".__init__" ] )
27
244
)
28
245
}
246
+
247
+ /** Join-order helper for `getModuleReference`. */
248
+ pragma [ nomagic]
249
+ private predicate module_name_in_scope ( DataFlow:: Node node , Scope s , string name , Module m ) {
250
+ node .getScope ( ) = s and
251
+ node .asExpr ( ) .( Name ) .getId ( ) = name and
252
+ pragma [ only_bind_into ] ( node ) = getImmediateModuleReference ( pragma [ only_bind_into ] ( m ) )
253
+ }
254
+
255
+ /** Join-order helper for `getModuleReference`. */
256
+ pragma [ nomagic]
257
+ private predicate module_reference_in_scope ( DataFlow:: Node node , Scope s , string name ) {
258
+ node .getScope ( ) = s and
259
+ exists ( Name n | n = node .asExpr ( ) |
260
+ n .getId ( ) = name and
261
+ pragma [ only_bind_into ] ( n ) .isUse ( )
262
+ )
263
+ }
264
+
265
+ /**
266
+ * Gets a reference to the module `m` (including through certain kinds of local and global flow).
267
+ */
268
+ DataFlow:: Node getModuleReference ( Module m ) {
269
+ // Immedate references to the module
270
+ result = getImmediateModuleReference ( m )
271
+ or
272
+ // Flow (local or global) forward to a later reference to the module.
273
+ exists ( DataFlow:: Node ref | ref = getModuleReference ( m ) |
274
+ DataFlow:: localFlow ( ref , result )
275
+ or
276
+ exists ( DataFlow:: ModuleVariableNode mv |
277
+ mv .getAWrite ( ) = ref and
278
+ result = mv .getARead ( )
279
+ )
280
+ )
281
+ or
282
+ // A reference to a name that is bound to a module in an enclosing scope.
283
+ exists ( DataFlow:: Node def , Scope def_scope , Scope use_scope , string name |
284
+ module_name_in_scope ( pragma [ only_bind_into ] ( def ) , pragma [ only_bind_into ] ( def_scope ) ,
285
+ pragma [ only_bind_into ] ( name ) , pragma [ only_bind_into ] ( m ) ) and
286
+ module_reference_in_scope ( result , use_scope , name ) and
287
+ use_scope .getEnclosingScope * ( ) = def_scope
288
+ )
289
+ }
290
+
29
291
}
0 commit comments