1
+ /**
2
+ * INTERNAL. DO NOT USE.
3
+ *
4
+ * Provides predicates for resolving imports.
5
+ */
6
+
1
7
private import python
2
8
private import semmle.python.dataflow.new.DataFlow
3
9
private import semmle.python.dataflow.new.internal.ImportStar
4
10
private import semmle.python.dataflow.new.TypeTracker
11
+ private import semmle.python.dataflow.new.internal.DataFlowPrivate
5
12
13
+ /**
14
+ * Python modules and the way imports are resolved are... complicated. Here's a crash course in how
15
+ * it works, as well as some caveats to bear in mind when looking at the implementation in this
16
+ * module.
17
+ *
18
+ * First, let's consider the humble `import` statement:
19
+ * ```python
20
+ * import foo
21
+ * import bar.baz
22
+ * import ham.eggs as spam
23
+ * ```
24
+ *
25
+ * In the AST, all imports are aliased, as in the last import above. That is, `import foo` becomes
26
+ * `import foo as foo`, and `import bar.baz` becomes `import bar as bar`. Note that `import` is
27
+ * exclusively used to import modules -- if `eggs` is an attribute of the `ham` module (and not a
28
+ * submodule of the `ham` package), then the third line above is an error.
29
+ *
30
+ * Next, we have the `from` statement. This one is a bit more complicated, but still has the same
31
+ * aliasing desugaring as above applied to it. Thus, `from foo import bar` becomes
32
+ * `from foo import bar as bar`.
33
+ *
34
+ * In general, `from foo import bar` can mean two different things:
35
+ *
36
+ * 1. If `foo` is a module, and `bar` is an attribute of `foo`, then `from foo import bar` imports
37
+ * the attribute `bar` into the current module (binding it to the name `bar`).
38
+ * 2. If `foo` is a package, and `bar` is already defined in `foo/__init__.py`,
39
+ * that value will be imported. If it is not defined, and `bar` is a submodule of `foo`, then
40
+ * `bar` is imported to `foo`, and the `bar` submodule imported.
41
+ * Note: We don't currently model if the attribute is already defined in `__init__.py`
42
+ * and always assume that the submodule will be used.
43
+ *
44
+ * Now, when it comes to how these imports are represented in the AST, things get a bit complicated.
45
+ * First of all, both of the above forms of imports get mapped to the same kind of AST node:
46
+ * `Import`. An `Import` node has a sequence of names, each of which is an `Alias` node. This `Alias`
47
+ * node represents the `x as y` bit of each imported module.
48
+ *
49
+ * The same is true for `from` imports. So, how then do we distinguish between the two forms of
50
+ * imports? The distinguishing feature is the left hand side of the `as` node. If the left hand side
51
+ * is an `ImportExpr`, then it is a plain import. If it is an `ImportMember`, then it is a `from`
52
+ * import. (And to confuse matters even more, this `ImportMember` contains another `ImportExpr` for
53
+ * the bit between the `from` and `import` keywords.)
54
+ *
55
+ * Caveats:
56
+ *
57
+ * - A relative import of the form `from .foo import bar as baz` not only imports `bar` and binds it
58
+ * to the name `baz`, but also imports `foo` and binds it to the name `foo`. This only happens with
59
+ * relative imports. `from foo import bar as baz` only binds `bar` to `baz`.
60
+ * - Modules may also be packages, so e.g. `import foo.bar` may import the `bar` submodule in the `foo`
61
+ * package, or the `bar` subpackage of the `foo` package. The practical difference here is the name of
62
+ * the module that is imported, as the package `foo.bar` will have the "name" `foo.bar.__init__`,
63
+ * corresponding to the fact that the code that is executed is in the `__init__.py` file of the
64
+ * `bar` subpackage.
65
+ */
6
66
module ImportResolution {
7
67
/**
8
68
* Holds if the module `m` defines a name `name` by assigning `defn` to it. This is an
9
69
* overapproximation, as `name` may not in fact be exported (e.g. by defining an `__all__` that does
10
70
* not include `name`).
11
71
*/
72
+ pragma [ nomagic]
12
73
predicate module_export ( Module m , string name , DataFlow:: CfgNode defn ) {
13
74
exists ( EssaVariable v |
14
75
v .getName ( ) = name and
@@ -18,12 +79,223 @@ module ImportResolution {
18
79
or
19
80
defn .getNode ( ) = v .getDefinition ( ) .( ArgumentRefinement ) .getArgument ( )
20
81
)
82
+ or
83
+ exists ( Alias a |
84
+ defn .asExpr ( ) = [ a .getValue ( ) , a .getValue ( ) .( ImportMember ) .getModule ( ) ] and
85
+ a .getAsname ( ) .( Name ) .getId ( ) = name and
86
+ defn .getScope ( ) = m
87
+ )
21
88
}
22
89
23
- Module getModule ( DataFlow:: CfgNode node ) {
24
- exists ( ModuleValue mv |
25
- node .getNode ( ) .pointsTo ( mv ) and
26
- result = mv .getScope ( )
90
+ /**
91
+ * Holds if the module `m` explicitly exports the name `name` by listing it in `__all__`. Only
92
+ * handles simple cases where we can statically tell that this is the case.
93
+ */
94
+ private predicate all_mentions_name ( Module m , string name ) {
95
+ exists ( DefinitionNode def , SequenceNode n |
96
+ def .getValue ( ) = n and
97
+ def .( NameNode ) .getId ( ) = "__all__" and
98
+ def .getScope ( ) = m and
99
+ any ( StrConst s | s .getText ( ) = name ) = n .getAnElement ( ) .getNode ( )
27
100
)
28
101
}
102
+
103
+ /**
104
+ * Holds if the module `m` either does not set `__all__` (and so implicitly exports anything that
105
+ * doesn't start with an underscore), or sets `__all__` in a way that's too complicated for us to
106
+ * handle (in which case we _also_ pretend that it just exports all such names).
107
+ */
108
+ private predicate no_or_complicated_all ( Module m ) {
109
+ // No mention of `__all__` in the module
110
+ not exists ( DefinitionNode def | def .getScope ( ) = m and def .( NameNode ) .getId ( ) = "__all__" )
111
+ or
112
+ // `__all__` is set to a non-sequence value
113
+ exists ( DefinitionNode def |
114
+ def .( NameNode ) .getId ( ) = "__all__" and
115
+ def .getScope ( ) = m and
116
+ not def .getValue ( ) instanceof SequenceNode
117
+ )
118
+ or
119
+ // `__all__` is used in some way that doesn't involve storing a value in it. This usually means
120
+ // it is being mutated through `append` or `extend`, which we don't handle.
121
+ exists ( NameNode n | n .getId ( ) = "__all__" and n .getScope ( ) = m and n .isLoad ( ) )
122
+ }
123
+
124
+ private predicate potential_module_export ( Module m , string name ) {
125
+ all_mentions_name ( m , name )
126
+ or
127
+ no_or_complicated_all ( m ) and
128
+ (
129
+ exists ( NameNode n | n .getId ( ) = name and n .getScope ( ) = m and name .charAt ( 0 ) != "_" )
130
+ or
131
+ exists ( Alias a | a .getAsname ( ) .( Name ) .getId ( ) = name and a .getValue ( ) .getScope ( ) = m )
132
+ )
133
+ }
134
+
135
+ /**
136
+ * Holds if the module `reexporter` exports the module `reexported` under the name
137
+ * `reexported_name`.
138
+ */
139
+ private predicate module_reexport ( Module reexporter , string reexported_name , Module reexported ) {
140
+ exists ( DataFlow:: Node ref |
141
+ ref = getImmediateModuleReference ( reexported ) and
142
+ module_export ( reexporter , reexported_name , ref ) and
143
+ potential_module_export ( reexporter , reexported_name )
144
+ )
145
+ }
146
+
147
+ /**
148
+ * Gets a reference to `sys.modules`.
149
+ */
150
+ private DataFlow:: Node sys_modules_reference ( ) {
151
+ result =
152
+ any ( DataFlow:: AttrRef a |
153
+ a .getAttributeName ( ) = "modules" and a .getObject ( ) .asExpr ( ) .( Name ) .getId ( ) = "sys"
154
+ )
155
+ }
156
+
157
+ /** Gets a module that may have been added to `sys.modules`. */
158
+ private Module sys_modules_module_with_name ( string name ) {
159
+ exists ( ControlFlowNode n , DataFlow:: Node mod |
160
+ exists ( SubscriptNode sub |
161
+ sub .getObject ( ) = sys_modules_reference ( ) .asCfgNode ( ) and
162
+ sub .getIndex ( ) = n and
163
+ n .getNode ( ) .( StrConst ) .getText ( ) = name and
164
+ sub .( DefinitionNode ) .getValue ( ) = mod .asCfgNode ( ) and
165
+ mod = getModuleReference ( result )
166
+ )
167
+ )
168
+ }
169
+
170
+ Module getModuleImportedByImportStar ( ImportStar i ) {
171
+ isPreferredModuleForName ( result .getFile ( ) , i .getImportedModuleName ( ) )
172
+ }
173
+
174
+ /**
175
+ * Gets a data-flow node that may be a reference to a module with the name `module_name`.
176
+ *
177
+ * This is a helper predicate for `getImmediateModuleReference`. It captures the fact that in an
178
+ * import such as `import foo`,
179
+ * - `foo` may simply be the name of a module, or
180
+ * - `foo` may be the name of a package (in which case its name is actually `foo.__init__`), or
181
+ * - `foo` may be a module name that has been added to `sys.modules` (in which case its actual name can
182
+ * be anything, for instance `os.path` is either `posixpath` or `ntpath`).
183
+ */
184
+ private DataFlow:: Node getReferenceToModuleName ( string module_name ) {
185
+ // Regular import statements, e.g.
186
+ // import foo # implicitly `import foo as foo`
187
+ // import foo as foo_alias
188
+ exists ( Import i , Alias a | a = i .getAName ( ) |
189
+ result .asExpr ( ) = a .getAsname ( ) and
190
+ module_name = a .getValue ( ) .( ImportExpr ) .getImportedModuleName ( )
191
+ )
192
+ or
193
+ // The module part of a `from ... import ...` statement, e.g. the `..foo.bar` in
194
+ // from ..foo.bar import baz # ..foo.bar might point to, say, package.subpackage.foo.bar
195
+ exists ( ImportMember i | result .asExpr ( ) = i .getModule ( ) |
196
+ module_name = i .getModule ( ) .( ImportExpr ) .getImportedModuleName ( )
197
+ )
198
+ or
199
+ // Modules (not attributes) imported via `from ... import ... statements`, e.g.
200
+ // from foo.bar import baz # imports foo.bar.baz as baz
201
+ // from foo.bar import baz as baz_alias # imports foo.bar.baz as baz_alias
202
+ exists ( Import i , Alias a , ImportMember im | a = i .getAName ( ) and im = a .getValue ( ) |
203
+ result .asExpr ( ) = a .getAsname ( ) and
204
+ module_name = im .getModule ( ) .( ImportExpr ) .getImportedModuleName ( ) + "." + im .getName ( )
205
+ )
206
+ or
207
+ // For parity with the points-to based solution, the `ImportExpr` and `ImportMember` bits of the
208
+ // above cases should _also_ point to the right modules.
209
+ result .asExpr ( ) = any ( ImportExpr i | i .getImportedModuleName ( ) = module_name )
210
+ or
211
+ result .asExpr ( ) =
212
+ any ( ImportMember i |
213
+ i .getModule ( ) .( ImportExpr ) .getImportedModuleName ( ) + "." + i .getName ( ) = module_name
214
+ )
215
+ }
216
+
217
+ /**
218
+ * Gets a dataflow node that is an immediate reference to the module `m`.
219
+ *
220
+ * Because of attribute lookups, this is mutually recursive with `getModuleReference`.
221
+ */
222
+ DataFlow:: Node getImmediateModuleReference ( Module m ) {
223
+ exists ( string module_name | result = getReferenceToModuleName ( module_name ) |
224
+ // Depending on whether the referenced module is a package or not, we may need to add a
225
+ // trailing `.__init__` to the module name.
226
+ isPreferredModuleForName ( m .getFile ( ) , module_name + [ "" , ".__init__" ] )
227
+ or
228
+ // Module defined via `sys.modules`
229
+ m = sys_modules_module_with_name ( module_name )
230
+ )
231
+ or
232
+ // Reading an attribute on a module may return a submodule (or subpackage).
233
+ exists ( DataFlow:: AttrRead ar , Module p , string attr_name |
234
+ ar .accesses ( getModuleReference ( p ) , attr_name ) and
235
+ result = ar
236
+ |
237
+ isPreferredModuleForName ( m .getFile ( ) , p .getPackageName ( ) + "." + attr_name + [ "" , ".__init__" ] )
238
+ )
239
+ or
240
+ // This is also true for attributes that come from reexports.
241
+ exists ( Module reexporter , string attr_name |
242
+ result .( DataFlow:: AttrRead ) .accesses ( getModuleReference ( reexporter ) , attr_name ) and
243
+ module_reexport ( reexporter , attr_name , m )
244
+ )
245
+ or
246
+ // Submodules that are implicitly defined with relative imports of the form `from .foo import ...`.
247
+ // In practice, we create a definition for each module in a package, even if it is not imported.
248
+ exists ( string submodule , Module package |
249
+ SsaSource:: init_module_submodule_defn ( result .asVar ( ) .getSourceVariable ( ) ,
250
+ package .getEntryNode ( ) ) and
251
+ isPreferredModuleForName ( m .getFile ( ) ,
252
+ package .getPackageName ( ) + "." + submodule + [ "" , ".__init__" ] )
253
+ )
254
+ }
255
+
256
+ /** Join-order helper for `getModuleReference`. */
257
+ pragma [ nomagic]
258
+ private predicate module_reference_in_scope ( DataFlow:: Node node , Scope s , string name , Module m ) {
259
+ node .getScope ( ) = s and
260
+ node .asExpr ( ) .( Name ) .getId ( ) = name and
261
+ pragma [ only_bind_into ] ( node ) = getImmediateModuleReference ( pragma [ only_bind_into ] ( m ) )
262
+ }
263
+
264
+ /** Join-order helper for `getModuleReference`. */
265
+ pragma [ nomagic]
266
+ private predicate module_name_in_scope ( DataFlow:: Node node , Scope s , string name ) {
267
+ node .getScope ( ) = s and
268
+ exists ( Name n | n = node .asExpr ( ) |
269
+ n .getId ( ) = name and
270
+ pragma [ only_bind_into ] ( n ) .isUse ( )
271
+ )
272
+ }
273
+
274
+ /**
275
+ * Gets a reference to the module `m` (including through certain kinds of local and global flow).
276
+ */
277
+ DataFlow:: Node getModuleReference ( Module m ) {
278
+ // Immedate references to the module
279
+ result = getImmediateModuleReference ( m )
280
+ or
281
+ // Flow (local or global) forward to a later reference to the module.
282
+ exists ( DataFlow:: Node ref | ref = getModuleReference ( m ) |
283
+ simpleLocalFlowStepForTypetracking ( ref , result )
284
+ or
285
+ exists ( DataFlow:: ModuleVariableNode mv |
286
+ mv .getAWrite ( ) = ref and
287
+ result = mv .getARead ( )
288
+ )
289
+ )
290
+ or
291
+ // A reference to a name that is bound to a module in an enclosing scope.
292
+ exists ( DataFlow:: Node def , Scope def_scope , Scope use_scope , string name |
293
+ module_reference_in_scope ( pragma [ only_bind_into ] ( def ) , pragma [ only_bind_into ] ( def_scope ) ,
294
+ pragma [ only_bind_into ] ( name ) , pragma [ only_bind_into ] ( m ) ) and
295
+ module_name_in_scope ( result , use_scope , name ) and
296
+ use_scope .getEnclosingScope * ( ) = def_scope
297
+ )
298
+ }
299
+
300
+ Module getModule ( DataFlow:: CfgNode node ) { node = getModuleReference ( result ) }
29
301
}
0 commit comments