Skip to content

Commit f969274

Browse files
authored
Merge pull request #14644 from yoff/python/filter-local-self-loops
Python: filter local self loops
2 parents 8d3ed68 + fd757b0 commit f969274

File tree

12 files changed

+126
-84
lines changed

12 files changed

+126
-84
lines changed

python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll

Lines changed: 46 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -279,14 +279,16 @@ class NonSyntheticPostUpdateNode extends PostUpdateNodeImpl, CfgNode {
279279
class DataFlowExpr = Expr;
280280

281281
/**
282-
* Flow between ESSA variables.
283-
* This includes both local and global variables.
284-
* Flow comes from definitions, uses and refinements.
282+
* A module to compute local flow.
283+
*
284+
* Flow will generally go from control flow nodes into essa variables at definitions,
285+
* and from there via use-use flow to other control flow nodes.
286+
*
287+
* Some syntaxtic constructs are handled separately.
285288
*/
286-
// TODO: Consider constraining `nodeFrom` and `nodeTo` to be in the same scope.
287-
// If they have different enclosing callables, we get consistency errors.
288-
module EssaFlow {
289-
predicate essaFlowStep(Node nodeFrom, Node nodeTo) {
289+
module LocalFlow {
290+
/** Holds if `nodeFrom` is the control flow node defining the essa variable `nodeTo`. */
291+
predicate definitionFlowStep(Node nodeFrom, Node nodeTo) {
290292
// Definition
291293
// `x = f(42)`
292294
// nodeFrom is `f(42)`, cfg node
@@ -336,24 +338,12 @@ module EssaFlow {
336338
// nodeFrom is `x`, cfgNode
337339
// nodeTo is `x`, essa var
338340
exists(ParameterDefinition pd |
339-
nodeFrom.asCfgNode() = pd.getDefiningNode() and
340-
nodeTo.asVar() = pd.getVariable()
341+
nodeFrom.(CfgNode).getNode() = pd.getDefiningNode() and
342+
nodeTo.(EssaNode).getVar() = pd.getVariable()
341343
)
342-
or
343-
// First use after definition
344-
// `y = 42`
345-
// `x = f(y)`
346-
// nodeFrom is `y` on first line, essa var
347-
// nodeTo is `y` on second line, cfg node
348-
defToFirstUse(nodeFrom.asVar(), nodeTo.asCfgNode())
349-
or
350-
// Next use after use
351-
// `x = f(y)`
352-
// `z = y + 1`
353-
// nodeFrom is 'y' on first line, cfg node
354-
// nodeTo is `y` on second line, cfg node
355-
useToNextUse(nodeFrom.asCfgNode(), nodeTo.asCfgNode())
356-
or
344+
}
345+
346+
predicate expressionFlowStep(Node nodeFrom, Node nodeTo) {
357347
// If expressions
358348
nodeFrom.asCfgNode() = nodeTo.asCfgNode().(IfExprNode).getAnOperand()
359349
or
@@ -366,6 +356,7 @@ module EssaFlow {
366356
// Flow inside an unpacking assignment
367357
iterableUnpackingFlowStep(nodeFrom, nodeTo)
368358
or
359+
// Flow inside a match statement
369360
matchFlowStep(nodeFrom, nodeTo)
370361
}
371362

@@ -376,6 +367,35 @@ module EssaFlow {
376367
predicate defToFirstUse(EssaVariable var, NameNode nodeTo) {
377368
AdjacentUses::firstUse(var.getDefinition(), nodeTo)
378369
}
370+
371+
predicate useUseFlowStep(Node nodeFrom, Node nodeTo) {
372+
// First use after definition
373+
// `y = 42`
374+
// `x = f(y)`
375+
// nodeFrom is `y` on first line, essa var
376+
// nodeTo is `y` on second line, cfg node
377+
defToFirstUse(nodeFrom.asVar(), nodeTo.asCfgNode())
378+
or
379+
// Next use after use
380+
// `x = f(y)`
381+
// `z = y + 1`
382+
// nodeFrom is 'y' on first line, cfg node
383+
// nodeTo is `y` on second line, cfg node
384+
useToNextUse(nodeFrom.asCfgNode(), nodeTo.asCfgNode())
385+
}
386+
387+
predicate localFlowStep(Node nodeFrom, Node nodeTo) {
388+
IncludePostUpdateFlow<PhaseDependentFlow<definitionFlowStep/2>::step/2>::step(nodeFrom, nodeTo)
389+
or
390+
IncludePostUpdateFlow<PhaseDependentFlow<expressionFlowStep/2>::step/2>::step(nodeFrom, nodeTo)
391+
or
392+
// Blindly applying use-use flow can result in a node that steps to itself, for
393+
// example in while-loops. To uphold dataflow consistency checks, we don't want
394+
// that. However, we do want to allow `[post] n` to `n` (to handle while loops), so
395+
// we should only do the filtering after `IncludePostUpdateFlow` has ben applied.
396+
IncludePostUpdateFlow<PhaseDependentFlow<useUseFlowStep/2>::step/2>::step(nodeFrom, nodeTo) and
397+
nodeFrom != nodeTo
398+
}
379399
}
380400

381401
//--------
@@ -481,7 +501,8 @@ predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
481501
* or at runtime when callables in the module are called.
482502
*/
483503
predicate simpleLocalFlowStepForTypetracking(Node nodeFrom, Node nodeTo) {
484-
IncludePostUpdateFlow<PhaseDependentFlow<EssaFlow::essaFlowStep/2>::step/2>::step(nodeFrom, nodeTo)
504+
IncludePostUpdateFlow<PhaseDependentFlow<LocalFlow::localFlowStep/2>::step/2>::step(nodeFrom,
505+
nodeTo)
485506
}
486507

487508
private predicate summaryLocalStep(Node nodeFrom, Node nodeTo) {

python/ql/lib/semmle/python/dataflow/new/internal/ImportResolution.qll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -111,13 +111,13 @@ module ImportResolution {
111111
allowedEssaImportStep*(firstDef, lastUseVar) and
112112
not allowedEssaImportStep(_, firstDef)
113113
|
114-
not EssaFlow::defToFirstUse(firstDef, _) and
114+
not LocalFlow::defToFirstUse(firstDef, _) and
115115
val.asVar() = firstDef
116116
or
117117
exists(ControlFlowNode mid, ControlFlowNode end |
118-
EssaFlow::defToFirstUse(firstDef, mid) and
119-
EssaFlow::useToNextUse*(mid, end) and
120-
not EssaFlow::useToNextUse(end, _) and
118+
LocalFlow::defToFirstUse(firstDef, mid) and
119+
LocalFlow::useToNextUse*(mid, end) and
120+
not LocalFlow::useToNextUse(end, _) and
121121
val.asCfgNode() = end
122122
)
123123
)

python/ql/test/experimental/dataflow/coverage/dataflow-consistency.expected

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -25,18 +25,5 @@ uniqueParameterNodeAtPosition
2525
uniqueParameterNodePosition
2626
uniqueContentApprox
2727
identityLocalStep
28-
| datamodel.py:84:15:84:15 | ControlFlowNode for x | Node steps to itself |
29-
| datamodel.py:166:11:166:11 | ControlFlowNode for x | Node steps to itself |
30-
| test.py:103:10:103:15 | ControlFlowNode for SOURCE | Node steps to itself |
31-
| test.py:130:10:130:15 | ControlFlowNode for SOURCE | Node steps to itself |
32-
| test.py:162:13:162:18 | ControlFlowNode for SOURCE | Node steps to itself |
33-
| test.py:167:13:167:18 | ControlFlowNode for SOURCE | Node steps to itself |
34-
| test.py:216:10:216:15 | ControlFlowNode for SOURCE | Node steps to itself |
35-
| test.py:242:9:242:12 | ControlFlowNode for SINK | Node steps to itself |
36-
| test.py:673:9:673:12 | ControlFlowNode for SINK | Node steps to itself |
37-
| test.py:674:9:674:14 | ControlFlowNode for SINK_F | Node steps to itself |
38-
| test.py:682:9:682:12 | ControlFlowNode for SINK | Node steps to itself |
39-
| test.py:690:9:690:12 | ControlFlowNode for SINK | Node steps to itself |
40-
| test.py:696:5:696:8 | ControlFlowNode for SINK | Node steps to itself |
4128
missingArgumentCall
4229
multipleArgumentCall
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
# All functions starting with "test_" should run and execute `print("OK")` exactly once.
2+
# This can be checked by running validTest.py.
3+
4+
import sys
5+
import os
6+
7+
sys.path.append(os.path.dirname(os.path.dirname((__file__))))
8+
from testlib import expects
9+
10+
# These are defined so that we can evaluate the test code.
11+
NONSOURCE = "not a source"
12+
SOURCE = "source"
13+
14+
15+
def is_source(x):
16+
return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j
17+
18+
19+
def SINK(x):
20+
if is_source(x):
21+
print("OK")
22+
else:
23+
print("Unexpected flow", x)
24+
25+
26+
def SINK_F(x):
27+
if is_source(x):
28+
print("Unexpected flow", x)
29+
else:
30+
print("OK")
31+
32+
# ------------------------------------------------------------------------------
33+
# Actual tests
34+
# ------------------------------------------------------------------------------
35+
36+
def test_while():
37+
x = NONSOURCE
38+
n = 2
39+
while n > 0:
40+
if n == 1:
41+
SINK(x) #$ flow="SOURCE, l:+1 -> x"
42+
x = SOURCE
43+
n -= 1
44+
45+
class MyObj(object):
46+
def __init__(self, foo):
47+
self.foo = foo
48+
49+
def setFoo(obj, x):
50+
obj.foo = x
51+
52+
def test_while_obj():
53+
myobj = MyObj(NONSOURCE)
54+
n = 2
55+
while n > 0:
56+
if n == 1:
57+
SINK(myobj.foo) #$ flow="SOURCE, l:+1 -> myobj.foo"
58+
setFoo(myobj, SOURCE)
59+
n -= 1
60+
61+
def setAndTestFoo(obj, x, test):
62+
if test:
63+
# This flow is not found, if self-loops are broken at the SSA level.
64+
SINK(obj.foo) #$ flow="SOURCE, l:+7 -> obj.foo"
65+
obj.foo = x
66+
67+
def test_while_obj_sink():
68+
myobj = MyObj(NONSOURCE)
69+
n = 2
70+
while n > 0:
71+
setAndTestFoo(myobj, SOURCE, n == 1)
72+
n -= 1
73+

python/ql/test/experimental/dataflow/module-initialization/localFlow.ql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ module ImportTimeLocalFlowTest implements FlowTestSig {
1212
// results are displayed next to `nodeTo`, so we need a line to write on
1313
nodeTo.getLocation().getStartLine() > 0 and
1414
nodeTo.asVar() instanceof GlobalSsaVariable and
15-
DP::PhaseDependentFlow<DP::EssaFlow::essaFlowStep/2>::importTimeStep(nodeFrom, nodeTo)
15+
DP::PhaseDependentFlow<DP::LocalFlow::localFlowStep/2>::importTimeStep(nodeFrom, nodeTo)
1616
}
1717
}
1818

python/ql/test/experimental/dataflow/strange-essaflow/testFlow.ql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,5 +33,5 @@ query predicate jumpStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
3333

3434
query predicate essaFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
3535
os_import(nodeFrom) and
36-
DataFlowPrivate::EssaFlow::essaFlowStep(nodeFrom, nodeTo)
36+
DataFlowPrivate::LocalFlow::localFlowStep(nodeFrom, nodeTo)
3737
}

python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep-py3/dataflow-consistency.expected

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,5 @@ uniqueParameterNodeAtPosition
2323
uniqueParameterNodePosition
2424
uniqueContentApprox
2525
identityLocalStep
26-
| test_collections.py:20:9:20:22 | ControlFlowNode for ensure_tainted | Node steps to itself |
27-
| test_unpacking.py:31:9:31:22 | ControlFlowNode for ensure_tainted | Node steps to itself |
2826
missingArgumentCall
2927
multipleArgumentCall

python/ql/test/experimental/dataflow/tainttracking/defaultAdditionalTaintStep/dataflow-consistency.expected

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -23,20 +23,5 @@ uniqueParameterNodeAtPosition
2323
uniqueParameterNodePosition
2424
uniqueContentApprox
2525
identityLocalStep
26-
| test_async.py:48:9:48:22 | ControlFlowNode for ensure_tainted | Node steps to itself |
27-
| test_collections.py:64:10:64:21 | ControlFlowNode for tainted_list | Node steps to itself |
28-
| test_collections.py:71:9:71:22 | ControlFlowNode for ensure_tainted | Node steps to itself |
29-
| test_collections.py:73:9:73:22 | ControlFlowNode for ensure_tainted | Node steps to itself |
30-
| test_collections.py:88:10:88:21 | ControlFlowNode for tainted_list | Node steps to itself |
31-
| test_collections.py:89:10:89:23 | ControlFlowNode for TAINTED_STRING | Node steps to itself |
32-
| test_collections.py:97:9:97:22 | ControlFlowNode for ensure_tainted | Node steps to itself |
33-
| test_collections.py:99:9:99:22 | ControlFlowNode for ensure_tainted | Node steps to itself |
34-
| test_collections.py:112:9:112:22 | ControlFlowNode for ensure_tainted | Node steps to itself |
35-
| test_collections.py:114:9:114:22 | ControlFlowNode for ensure_tainted | Node steps to itself |
36-
| test_collections.py:147:9:147:22 | ControlFlowNode for ensure_tainted | Node steps to itself |
37-
| test_collections.py:149:9:149:22 | ControlFlowNode for ensure_tainted | Node steps to itself |
38-
| test_collections.py:246:9:246:15 | ControlFlowNode for my_dict | Node steps to itself |
39-
| test_collections.py:246:22:246:33 | ControlFlowNode for tainted_dict | Node steps to itself |
40-
| test_for.py:24:9:24:22 | ControlFlowNode for ensure_tainted | Node steps to itself |
4126
missingArgumentCall
4227
multipleArgumentCall

python/ql/test/experimental/dataflow/validTest.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ def check_tests_valid_after_version(testFile, version):
6565
check_tests_valid("coverage.argumentPassing")
6666
check_tests_valid("coverage.datamodel")
6767
check_tests_valid("coverage.test_builtins")
68+
check_tests_valid("coverage.loops")
6869
check_tests_valid("coverage-py2.classes")
6970
check_tests_valid("coverage-py3.classes")
7071
check_tests_valid("variable-capture.in")

python/ql/test/experimental/dataflow/variable-capture/dataflow-consistency.expected

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,5 @@ uniqueParameterNodeAtPosition
2929
uniqueParameterNodePosition
3030
uniqueContentApprox
3131
identityLocalStep
32-
| test_collections.py:36:10:36:15 | ControlFlowNode for SOURCE | Node steps to itself |
33-
| test_collections.py:45:19:45:21 | ControlFlowNode for mod | Node steps to itself |
34-
| test_collections.py:52:13:52:21 | ControlFlowNode for mod_local | Node steps to itself |
3532
missingArgumentCall
3633
multipleArgumentCall

0 commit comments

Comments
 (0)