Skip to content

Commit 75c4d6a

Browse files
authored
Merge pull request #6650 from yoff/python-dataflow/init-time
Python: Import time dataflow
2 parents 43f7eed + f34d1ee commit 75c4d6a

File tree

14 files changed

+324
-125
lines changed

14 files changed

+324
-125
lines changed

python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll

Lines changed: 44 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ class DataFlowExpr = Expr;
152152
* Flow comes from definitions, uses and refinements.
153153
*/
154154
// TODO: Consider constraining `nodeFrom` and `nodeTo` to be in the same scope.
155+
// If they have different enclosing callables, we get consistency errors.
155156
module EssaFlow {
156157
predicate essaFlowStep(Node nodeFrom, Node nodeTo) {
157158
// Definition
@@ -228,35 +229,60 @@ module EssaFlow {
228229
//--------
229230
/**
230231
* This is the local flow predicate that is used as a building block in global
231-
* data flow. It is a strict subset of the `localFlowStep` predicate, as it
232-
* excludes SSA flow through instance fields.
232+
* data flow.
233+
*
234+
* Local flow can happen either at import time, when the module is initialised
235+
* or at runtime when callables in the module are called.
233236
*/
234237
predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
235-
// If there is ESSA-flow out of a node `node`, we want flow
238+
// If there is local flow out of a node `node`, we want flow
236239
// both out of `node` and any post-update node of `node`.
237240
exists(Node node |
238-
EssaFlow::essaFlowStep(node, nodeTo) and
239241
nodeFrom = update(node) and
240242
(
241-
not node instanceof EssaNode or
242-
not nodeTo instanceof EssaNode or
243-
localEssaStep(node, nodeTo)
243+
importTimeLocalFlowStep(node, nodeTo) or
244+
runtimeLocalFlowStep(node, nodeTo)
244245
)
245246
)
246247
}
247248

248249
/**
249-
* Holds if there is an Essa flow step from `nodeFrom` to `nodeTo` that does not switch between
250-
* local and global SSA variables.
250+
* Holds if `node` is found at the top level of a module.
251251
*/
252-
private predicate localEssaStep(EssaNode nodeFrom, EssaNode nodeTo) {
253-
EssaFlow::essaFlowStep(nodeFrom, nodeTo) and
254-
(
255-
nodeFrom.getVar() instanceof GlobalSsaVariable and
256-
nodeTo.getVar() instanceof GlobalSsaVariable
257-
or
258-
not nodeFrom.getVar() instanceof GlobalSsaVariable and
259-
not nodeTo.getVar() instanceof GlobalSsaVariable
252+
pragma[inline]
253+
predicate isTopLevel(Node node) { node.getScope() instanceof Module }
254+
255+
/** Holds if there is local flow from `nodeFrom` to `nodeTo` at import time. */
256+
predicate importTimeLocalFlowStep(Node nodeFrom, Node nodeTo) {
257+
// As a proxy for whether statements can be executed at import time,
258+
// we check if they appear at the top level.
259+
// This will miss statements inside functions called from the top level.
260+
isTopLevel(nodeFrom) and
261+
isTopLevel(nodeTo) and
262+
EssaFlow::essaFlowStep(nodeFrom, nodeTo)
263+
}
264+
265+
/** Holds if there is local flow from `nodeFrom` to `nodeTo` at runtime. */
266+
predicate runtimeLocalFlowStep(Node nodeFrom, Node nodeTo) {
267+
// Anything not at the top level can be executed at runtime.
268+
not isTopLevel(nodeFrom) and
269+
not isTopLevel(nodeTo) and
270+
EssaFlow::essaFlowStep(nodeFrom, nodeTo)
271+
}
272+
273+
/** `ModuleVariable`s are accessed via jump steps at runtime. */
274+
predicate runtimeJumpStep(Node nodeFrom, Node nodeTo) {
275+
// Module variable read
276+
nodeFrom.(ModuleVariableNode).getARead() = nodeTo
277+
or
278+
// Module variable write
279+
nodeFrom = nodeTo.(ModuleVariableNode).getAWrite()
280+
or
281+
// Setting the possible values of the variable at the end of import time
282+
exists(SsaVariable def |
283+
def = any(SsaVariable var).getAnUltimateDefinition() and
284+
def.getDefinition() = nodeFrom.asCfgNode() and
285+
def.getVariable() = nodeTo.(ModuleVariableNode).getVariable()
260286
)
261287
}
262288

@@ -860,11 +886,7 @@ string ppReprType(DataFlowType t) { none() }
860886
* taken into account.
861887
*/
862888
predicate jumpStep(Node nodeFrom, Node nodeTo) {
863-
// Module variable read
864-
nodeFrom.(ModuleVariableNode).getARead() = nodeTo
865-
or
866-
// Module variable write
867-
nodeFrom = nodeTo.(ModuleVariableNode).getAWrite()
889+
runtimeJumpStep(nodeFrom, nodeTo)
868890
or
869891
// Read of module attribute:
870892
exists(AttrRead r, ModuleValue mv |

python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPublic.qll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,7 @@ class ModuleVariableNode extends Node, TModuleVariableNode {
332332
override Scope getScope() { result = mod }
333333

334334
override string toString() {
335-
result = "ModuleVariableNode for " + var.toString() + " in " + mod.toString()
335+
result = "ModuleVariableNode for " + mod.getName() + "." + var.getId()
336336
}
337337

338338
/** Gets the module in which this variable appears. */

python/ql/test/experimental/dataflow/fieldflow/globalStep.expected

Lines changed: 104 additions & 86 deletions
Large diffs are not rendered by default.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
foo = 3

python/ql/test/experimental/dataflow/module-initialization/localFlow.expected

Whitespace-only changes.
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
// This query should be more focused yet.
2+
import python
3+
import experimental.dataflow.TestUtil.FlowTest
4+
private import semmle.python.dataflow.new.internal.PrintNode
5+
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DP
6+
7+
class ImportTimeLocalFlowTest extends FlowTest {
8+
ImportTimeLocalFlowTest() { this = "ImportTimeLocalFlowTest" }
9+
10+
override string flowTag() { result = "importTimeFlow" }
11+
12+
override predicate relevantFlow(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
13+
nodeFrom.getLocation().getFile().getBaseName() = "multiphase.py" and
14+
// results are displayed next to `nodeTo`, so we need a line to write on
15+
nodeTo.getLocation().getStartLine() > 0 and
16+
nodeTo.asVar() instanceof GlobalSsaVariable and
17+
DP::importTimeLocalFlowStep(nodeFrom, nodeTo)
18+
}
19+
}
20+
21+
class RuntimeLocalFlowTest extends FlowTest {
22+
RuntimeLocalFlowTest() { this = "RuntimeLocalFlowTest" }
23+
24+
override string flowTag() { result = "runtimeFlow" }
25+
26+
override predicate relevantFlow(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
27+
nodeFrom.getLocation().getFile().getBaseName() = "multiphase.py" and
28+
// results are displayed next to `nodeTo`, so we need a line to write on
29+
nodeTo.getLocation().getStartLine() > 0 and
30+
(
31+
nodeFrom instanceof DataFlow::ModuleVariableNode or
32+
nodeTo instanceof DataFlow::ModuleVariableNode
33+
) and
34+
DP::runtimeJumpStep(nodeFrom, nodeTo)
35+
}
36+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# constant
2+
foo = 42
3+
4+
import base
5+
6+
def passOn(x):
7+
return x
8+
9+
# depends on other constant
10+
bar = passOn(base.foo)
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import sys #$ importTimeFlow="ImportExpr -> GSSA Variable sys"
2+
import os #$ importTimeFlow="ImportExpr -> GSSA Variable os"
3+
4+
sys.path.append(os.path.dirname(os.path.dirname((__file__))))
5+
from testlib import *
6+
7+
# These are defined so that we can evaluate the test code.
8+
NONSOURCE = "not a source" #$ importTimeFlow="'not a source' -> GSSA Variable NONSOURCE"
9+
SOURCE = "source" #$ importTimeFlow="'source' -> GSSA Variable SOURCE"
10+
11+
12+
def is_source(x): #$ importTimeFlow="FunctionExpr -> GSSA Variable is_source"
13+
return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j
14+
15+
16+
def SINK(x): #$ importTimeFlow="FunctionExpr -> GSSA Variable SINK"
17+
if is_source(x): #$ runtimeFlow="ModuleVariableNode for multiphase.is_source, l:-17 -> is_source"
18+
print("OK") #$ runtimeFlow="ModuleVariableNode for multiphase.print, l:-18 -> print"
19+
else:
20+
print("Unexpected flow", x) #$ runtimeFlow="ModuleVariableNode for multiphase.print, l:-20 -> print"
21+
22+
23+
def SINK_F(x): #$ importTimeFlow="FunctionExpr -> GSSA Variable SINK_F"
24+
if is_source(x): #$ runtimeFlow="ModuleVariableNode for multiphase.is_source, l:-24 -> is_source"
25+
print("Unexpected flow", x) #$ runtimeFlow="ModuleVariableNode for multiphase.print, l:-25 -> print"
26+
else:
27+
print("OK") #$ runtimeFlow="ModuleVariableNode for multiphase.print, l:-27 -> print"
28+
29+
def set_foo(): #$ importTimeFlow="FunctionExpr -> GSSA Variable set_foo"
30+
global foo
31+
foo = SOURCE #$ runtimeFlow="ModuleVariableNode for multiphase.SOURCE, l:-31 -> SOURCE" # missing final definition of foo
32+
33+
foo = NONSOURCE #$ importTimeFlow="NONSOURCE -> GSSA Variable foo"
34+
set_foo()
35+
36+
@expects(2)
37+
def test_phases(): #$ importTimeFlow="expects(..)(..), l:-1 -> GSSA Variable test_phases"
38+
global foo
39+
SINK(foo) #$ runtimeFlow="ModuleVariableNode for multiphase.SINK, l:-39 -> SINK" runtimeFlow="ModuleVariableNode for multiphase.foo, l:-39 -> foo"
40+
foo = NONSOURCE #$ runtimeFlow="ModuleVariableNode for multiphase.NONSOURCE, l:-40 -> NONSOURCE"
41+
set_foo() #$ runtimeFlow="ModuleVariableNode for multiphase.set_foo, l:-41 -> set_foo"
42+
SINK(foo) #$ runtimeFlow="ModuleVariableNode for multiphase.SINK, l:-42 -> SINK" runtimeFlow="ModuleVariableNode for multiphase.foo, l:-42 -> foo"
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# These are defined so that we can evaluate the test code.
2+
NONSOURCE = "not a source"
3+
SOURCE = "source"
4+
5+
6+
def is_source(x):
7+
return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j
8+
9+
10+
def SINK(x):
11+
if is_source(x):
12+
print("OK")
13+
else:
14+
print("Unexpected flow", x)
15+
16+
17+
def SINK_F(x):
18+
if is_source(x):
19+
print("Unexpected flow", x)
20+
else:
21+
print("OK")
22+
23+
import base
24+
25+
base.foo = 42
26+
27+
import m1
28+
29+
def test_const():
30+
SINK(m1.foo)
31+
32+
def test_overwritten():
33+
SINK(m1.bar)
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# These are defined so that we can evaluate the test code.
2+
NONSOURCE = "not a source"
3+
SOURCE = "source"
4+
5+
6+
def is_source(x):
7+
return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j
8+
9+
10+
def SINK(x):
11+
if is_source(x):
12+
print("OK")
13+
else:
14+
print("Unexpected flow", x)
15+
16+
17+
def SINK_F(x):
18+
if is_source(x):
19+
print("Unexpected flow", x)
20+
else:
21+
print("OK")
22+
23+
import m1
24+
25+
import base
26+
27+
base.foo = 42
28+
29+
def test_const():
30+
SINK(m1.foo)
31+
32+
def test_unoverwritten():
33+
SINK_F(m1.bar)

0 commit comments

Comments
 (0)