Skip to content

Commit b540eb0

Browse files
committed
Python: Various small fixes
- Swaps `module_reference_in_scope` and `module_name_in_scope`. - uses `AttrRead::accesses` instead of `getObject`, etc. - Removes an errant `none()`. - Expands the QLDoc for some of the predicates.
1 parent 7f79043 commit b540eb0

File tree

1 file changed

+28
-16
lines changed

1 file changed

+28
-16
lines changed

python/ql/lib/semmle/python/dataflow/new/internal/ImportResolution.qll

Lines changed: 28 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,11 @@ private import semmle.python.dataflow.new.TypeTracker
3434
*
3535
* 1. If `foo` is a module, and `bar` is an attribute of `foo`, then `from foo import bar` imports
3636
* the attribute `bar` into the current module (binding it to the name `bar`).
37-
* 2. If `foo` is a package, and `bar` is a submodule of `foo`, then `from foo import bar` first imports
38-
* `foo.bar`, and then reads the `bar` attribute on `foo`. In most cases, that attribute
39-
* will then point to the `bar` submodule.
37+
* 2. If `foo` is a package, and `bar` is already defined in `foo/__init__.py`,
38+
* that value will be imported. If it is not defined, and `bar` is a submodule of `foo`, then
39+
* `bar` is imported to `foo`, and the `bar` submodule imported.
40+
* Note: We don't currently model if the attribute is already defined in `__init__.py`
41+
* and always assume that the submodule will be used.
4042
*
4143
* Now, when it comes to how these imports are represented in the AST, things get a bit complicated.
4244
* First of all, both of the above forms of imports get mapped to the same kind of AST node:
@@ -58,7 +60,7 @@ private import semmle.python.dataflow.new.TypeTracker
5860
* package, or the `bar` subpackage of the `foo` package. The practical difference here is the name of
5961
* the module that is imported, as the package `foo.bar` will have the "name" `foo.bar.__init__`,
6062
* corresponding to the fact that the code that is executed is in the `__init__.py` file of the
61-
* `bar` package.
63+
* `bar` subpackage.
6264
*/
6365
module ImportResolution {
6466
/**
@@ -168,8 +170,17 @@ module ImportResolution {
168170
isPreferredModuleForName(result.getFile(), i.getImportedModuleName())
169171
}
170172

171-
/** Gets a data-flow node that may be a reference to a module with the name `module_name`. */
172-
DataFlow::Node getReferenceToModuleName(string module_name) {
173+
/**
174+
* Gets a data-flow node that may be a reference to a module with the name `module_name`.
175+
*
176+
* This is a helper predicate for `getImmediateModuleReference`. It captures the fact that in an
177+
* import such as `import foo`,
178+
* - `foo` may simply be the name of a module, or
179+
* - `foo` may be the name of a package (in which case its name is actually `foo.__init__`), or
180+
* - `foo` may be a module name that has been added to `sys.modules` (in which case its actual name can
181+
* be anything, for instance `os.path` is either `posixpath` or `ntpath`).
182+
*/
183+
private DataFlow::Node getReferenceToModuleName(string module_name) {
173184
// Regular import statements, e.g.
174185
// import foo # implicitly `import foo as foo`
175186
// import foo as foo_alias
@@ -188,7 +199,6 @@ module ImportResolution {
188199
// from foo.bar import baz # imports foo.bar.baz as baz
189200
// from foo.bar import baz as baz_alias # imports foo.bar.baz as baz_alias
190201
exists(Import i, Alias a, ImportMember im | a = i.getAName() and im = a.getValue() |
191-
i.isFromImport() and
192202
result.asExpr() = a.getAsname() and
193203
module_name = im.getModule().(ImportExpr).getImportedModuleName() + "." + im.getName()
194204
)
@@ -201,12 +211,15 @@ module ImportResolution {
201211
any(ImportMember i |
202212
i.getModule().(ImportExpr).getImportedModuleName() = module_name
203213
or
204-
i.getModule().(ImportExpr).getImportedModuleName() + "." + i.getName() = module_name and
205-
none()
214+
i.getModule().(ImportExpr).getImportedModuleName() + "." + i.getName() = module_name
206215
)
207216
}
208217

209-
/** Gets a dataflow node that is an immediate reference to the module `m`. */
218+
/**
219+
* Gets a dataflow node that is an immediate reference to the module `m`.
220+
*
221+
* Because of attribute lookups, this is mutually recursive with `getModuleReference`.
222+
*/
210223
DataFlow::Node getImmediateModuleReference(Module m) {
211224
exists(string module_name | result = getReferenceToModuleName(module_name) |
212225
// Depending on whether the referenced module is a package or not, we may need to add a
@@ -219,9 +232,8 @@ module ImportResolution {
219232
or
220233
// Reading an attribute on a module may return a submodule (or subpackage).
221234
exists(DataFlow::AttrRead ar, Module p, string attr_name |
222-
ar.getObject() = getModuleReference(p) and
235+
ar.accesses(getModuleReference(p), attr_name) and
223236
attr_name = any(Module m0).getFile().getStem() and
224-
ar.getAttributeName() = attr_name and
225237
result = ar
226238
|
227239
isPreferredModuleForName(m.getFile(), p.getPackageName() + "." + attr_name + ["", ".__init__"])
@@ -242,15 +254,15 @@ module ImportResolution {
242254

243255
/** Join-order helper for `getModuleReference`. */
244256
pragma[nomagic]
245-
private predicate module_name_in_scope(DataFlow::Node node, Scope s, string name, Module m) {
257+
private predicate module_reference_in_scope(DataFlow::Node node, Scope s, string name, Module m) {
246258
node.getScope() = s and
247259
node.asExpr().(Name).getId() = name and
248260
pragma[only_bind_into](node) = getImmediateModuleReference(pragma[only_bind_into](m))
249261
}
250262

251263
/** Join-order helper for `getModuleReference`. */
252264
pragma[nomagic]
253-
private predicate module_reference_in_scope(DataFlow::Node node, Scope s, string name) {
265+
private predicate module_name_in_scope(DataFlow::Node node, Scope s, string name) {
254266
node.getScope() = s and
255267
exists(Name n | n = node.asExpr() |
256268
n.getId() = name and
@@ -277,9 +289,9 @@ module ImportResolution {
277289
or
278290
// A reference to a name that is bound to a module in an enclosing scope.
279291
exists(DataFlow::Node def, Scope def_scope, Scope use_scope, string name |
280-
module_name_in_scope(pragma[only_bind_into](def), pragma[only_bind_into](def_scope),
292+
module_reference_in_scope(pragma[only_bind_into](def), pragma[only_bind_into](def_scope),
281293
pragma[only_bind_into](name), pragma[only_bind_into](m)) and
282-
module_reference_in_scope(result, use_scope, name) and
294+
module_name_in_scope(result, use_scope, name) and
283295
use_scope.getEnclosingScope*() = def_scope
284296
)
285297
}

0 commit comments

Comments
 (0)