Skip to content

Commit c0eb611

Browse files
authored
Merge pull request github#12244 from RasmusWL/import-refined
Python: Fix import of refined variable
2 parents 49d5149 + b2f34ef commit c0eb611

29 files changed

+645
-41
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
category: minorAnalysis
3+
---
4+
* Fixed module resolution so we allow imports of definitions that have had an attribute assigned to it, such as `class Foo; Foo.bar = 42`.

python/ql/lib/semmle/python/dataflow/new/internal/ImportResolution.qll

Lines changed: 75 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -65,31 +65,75 @@ private import semmle.python.dataflow.new.internal.DataFlowPrivate
6565
*/
6666
module ImportResolution {
6767
/**
68-
* Holds if the module `m` defines a name `name` by assigning `defn` to it. This is an
69-
* overapproximation, as `name` may not in fact be exported (e.g. by defining an `__all__` that does
70-
* not include `name`).
68+
* Holds if there is an ESSA step from `defFrom` to `defTo`, which should be allowed
69+
* for import resolution.
70+
*/
71+
private predicate allowedEssaImportStep(EssaDefinition defFrom, EssaDefinition defTo) {
72+
// to handle definitions guarded by if-then-else
73+
defFrom = defTo.(PhiFunction).getAnInput()
74+
or
75+
// refined variable
76+
// example: https://github.com/nvbn/thefuck/blob/ceeaeab94b5df5a4fe9d94d61e4f6b0bbea96378/thefuck/utils.py#L25-L45
77+
defFrom = defTo.(EssaNodeRefinement).getInput().getDefinition()
78+
}
79+
80+
/**
81+
* Holds if the module `m` defines a name `name` with the value `val`. The value
82+
* represents the value `name` will have at the end of the module (the last place we
83+
* have def-use flow to).
84+
*
85+
* Note: The handling of re-exporting imports is a bit simplistic. We assume that if
86+
* an import is made, it will be re-exported (which will not be the case if a new
87+
* value is assigned to the name, or it is deleted).
7188
*/
7289
pragma[nomagic]
73-
predicate module_export(Module m, string name, DataFlow::CfgNode defn) {
74-
exists(EssaVariable v, EssaDefinition essaDef |
75-
v.getName() = name and
76-
v.getAUse() = ImportStar::getStarImported*(m).getANormalExit() and
77-
(
78-
essaDef = v.getDefinition()
79-
or
80-
// to handle definitions guarded by if-then-else
81-
essaDef = v.getDefinition().(PhiFunction).getAnInput()
82-
)
90+
predicate module_export(Module m, string name, DataFlow::Node val) {
91+
// Definitions made inside `m` itself
92+
//
93+
// for code such as `foo = ...; foo.bar = ...` there will be TWO
94+
// EssaDefinition/EssaVariable. One for `foo = ...` (AssignmentDefinition) and one
95+
// for `foo.bar = ...`. The one for `foo.bar = ...` (EssaNodeRefinement). The
96+
// EssaNodeRefinement is the one that will reach the end of the module (normal
97+
// exit).
98+
//
99+
// However, we cannot just use the EssaNodeRefinement as the `val`, because the
100+
// normal data-flow depends on use-use flow, and use-use flow targets CFG nodes not
101+
// EssaNodes. So we need to go back from the EssaDefinition/EssaVariable that
102+
// reaches the end of the module, to the first definition of the variable, and then
103+
// track forwards using use-use flow to find a suitable CFG node that has flow into
104+
// it from use-use flow.
105+
exists(EssaVariable lastUseVar, EssaVariable firstDef |
106+
lastUseVar.getName() = name and
107+
// we ignore special variable $ introduced by our analysis (not used for anything)
108+
// we ignore special variable * introduced by `from <pkg> import *` -- TODO: understand why we even have this?
109+
not name in ["$", "*"] and
110+
lastUseVar.getAUse() = m.getANormalExit() and
111+
allowedEssaImportStep*(firstDef, lastUseVar) and
112+
not allowedEssaImportStep(_, firstDef)
83113
|
84-
defn.getNode() = essaDef.(AssignmentDefinition).getValue()
114+
not EssaFlow::defToFirstUse(firstDef, _) and
115+
val.asVar() = firstDef
85116
or
86-
defn.getNode() = essaDef.(ArgumentRefinement).getArgument()
117+
exists(ControlFlowNode mid, ControlFlowNode end |
118+
EssaFlow::defToFirstUse(firstDef, mid) and
119+
EssaFlow::useToNextUse*(mid, end) and
120+
not EssaFlow::useToNextUse(end, _) and
121+
val.asCfgNode() = end
122+
)
123+
)
124+
or
125+
// re-exports from `from <pkg> import *`
126+
exists(Module importedFrom |
127+
importedFrom = ImportStar::getStarImported(m) and
128+
module_export(importedFrom, name, val) and
129+
potential_module_export(importedFrom, name)
87130
)
88131
or
132+
// re-exports from `import <pkg>` or `from <pkg> import <stuff>`
89133
exists(Alias a |
90-
defn.asExpr() = [a.getValue(), a.getValue().(ImportMember).getModule()] and
134+
val.asExpr() = a.getValue() and
91135
a.getAsname().(Name).getId() = name and
92-
defn.getScope() = m
136+
val.getScope() = m
93137
)
94138
}
95139

@@ -263,9 +307,21 @@ module ImportResolution {
263307
module_reexport(reexporter, attr_name, m)
264308
)
265309
or
266-
// Submodules that are implicitly defined with relative imports of the form `from .foo import ...`.
267-
// In practice, we create a definition for each module in a package, even if it is not imported.
310+
// submodules of packages will be available as `<pkg>.<submodule>` after doing
311+
// `import <pkg>.<submodule>` at least once in the program, or can be directly
312+
// imported with `from <pkg> import <submodule>` (even with an empty
313+
// `<pkg>.__init__` file).
314+
//
315+
// Until an import of `<pkg>.<submodule>` is executed, it is technically possible
316+
// that `<pkg>.<submodule>` (or `from <pkg> import <submodule>`) can refer to an
317+
// attribute set in `<pkg>.__init__`.
318+
//
319+
// Therefore, if there is an attribute defined in `<pkg>.__init__` with the same
320+
// name as a submodule, we always consider that this attribute _could_ be a
321+
// reference to the submodule, even if we don't know that the submodule has been
322+
// imported yet.
268323
exists(string submodule, Module package |
324+
submodule = result.asVar().getName() and
269325
SsaSource::init_module_submodule_defn(result.asVar().getSourceVariable(),
270326
package.getEntryNode()) and
271327
m = getModuleFromName(package.getPackageName() + "." + submodule)

python/ql/test/experimental/dataflow/coverage-py2/argumentRoutingTest.expected

Whitespace-only changes.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../coverage/argumentRoutingTest.ql
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
# Python 2 specific tests, like the one in coverage/classes.py
2+
#
3+
# User-defined methods, both instance methods and class methods, can be called in many non-standard ways
4+
# i.e. differently from simply `c.f()` or `C.f()`. For example, a user-defined `__await__` method on a
5+
# class `C` will be called by the syntactic construct `await c` when `c` is an instance of `C`.
6+
#
7+
# These tests should cover all the class calls that we hope to support.
8+
# It is based on https://docs.python.org/3/reference/datamodel.html, and headings refer there.
9+
#
10+
# All functions starting with "test_" should run and execute `print("OK")` exactly once.
11+
# This can be checked by running validTest.py.
12+
13+
import sys
14+
import os
15+
16+
sys.path.append(os.path.dirname(os.path.dirname((__file__))))
17+
from testlib import expects
18+
19+
20+
def SINK1(x):
21+
pass
22+
23+
24+
def SINK2(x):
25+
pass
26+
27+
28+
def SINK3(x):
29+
pass
30+
31+
32+
def SINK4(x):
33+
pass
34+
35+
36+
def OK():
37+
print("OK")
38+
39+
40+
# 3.3.8. Emulating numeric types
41+
42+
# object.__index__(self)
43+
class With_index:
44+
def __index__(self):
45+
SINK1(self)
46+
OK() # Call not found
47+
return 0
48+
49+
50+
def test_index():
51+
import operator
52+
53+
with_index = With_index() #$ MISSING: arg1="SSA variable with_index" func=With_index.__index__
54+
operator.index(with_index)
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
semmle-extractor-options: --max-import-depth=1 --lang=2

python/ql/test/experimental/dataflow/coverage-py3/argumentRoutingTest.expected

Whitespace-only changes.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../coverage/argumentRoutingTest.ql
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
# Python 3 specific tests, like the one in coverage/classes.py
2+
#
3+
# User-defined methods, both instance methods and class methods, can be called in many non-standard ways
4+
# i.e. differently from simply `c.f()` or `C.f()`. For example, a user-defined `__await__` method on a
5+
# class `C` will be called by the syntactic construct `await c` when `c` is an instance of `C`.
6+
#
7+
# These tests should cover all the class calls that we hope to support.
8+
# It is based on https://docs.python.org/3/reference/datamodel.html, and headings refer there.
9+
#
10+
# All functions starting with "test_" should run and execute `print("OK")` exactly once.
11+
# This can be checked by running validTest.py.
12+
13+
import sys
14+
import os
15+
16+
sys.path.append(os.path.dirname(os.path.dirname((__file__))))
17+
from testlib import expects
18+
19+
20+
def SINK1(x):
21+
pass
22+
23+
24+
def SINK2(x):
25+
pass
26+
27+
28+
def SINK3(x):
29+
pass
30+
31+
32+
def SINK4(x):
33+
pass
34+
35+
36+
def OK():
37+
print("OK")
38+
39+
40+
41+
# 3.3.7. Emulating container types
42+
43+
# object.__length_hint__(self)
44+
class With_length_hint:
45+
def __length_hint__(self):
46+
SINK1(self)
47+
OK()
48+
return 0
49+
50+
51+
def test_length_hint():
52+
import operator
53+
54+
with_length_hint = With_length_hint() #$ arg1="SSA variable with_length_hint" func=With_length_hint.__length_hint__
55+
operator.length_hint(with_length_hint)
56+
57+
58+
# 3.3.8. Emulating numeric types
59+
60+
# object.__index__(self)
61+
class With_index:
62+
def __index__(self):
63+
SINK1(self)
64+
OK() # Call not found
65+
return 0
66+
67+
68+
def test_index():
69+
import operator
70+
71+
with_index = With_index() #$ arg1="SSA variable with_index" func=With_index.__index__
72+
operator.index(with_index)
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
semmle-extractor-options: --max-import-depth=1 --lang=3

0 commit comments

Comments
 (0)