Skip to content

Commit 00c0ebe

Browse files
authored
Merge pull request github#13738 from RasmusWL/path-steps
Python: Include all assignments in data flow paths
2 parents 2b718fb + 843f268 commit 00c0ebe

File tree

49 files changed

+1346
-911
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+1346
-911
lines changed

python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -513,15 +513,21 @@ class CastNode extends Node {
513513
* explanations.
514514
*/
515515
predicate neverSkipInPathGraph(Node n) {
516-
// We include read- and store steps here to force them to be
517-
// shown in path explanations.
518-
// This hack is necessary, because we have included some of these
519-
// steps as default taint steps, making them be suppressed in path
520-
// explanations.
521-
// We should revert this once, we can remove this steps from the
522-
// default taint steps; this should be possible once we have
523-
// implemented flow summaries and recursive content.
524-
readStep(_, _, n) or storeStep(_, _, n)
516+
// NOTE: We could use RHS of a definition, but since we have use-use flow, in an
517+
// example like
518+
// ```py
519+
// x = SOURCE()
520+
// if <cond>:
521+
// y = x
522+
// SINK(x)
523+
// ```
524+
// we would end up saying that the path MUST not skip the x in `y = x`, which is just
525+
// annoying and doesn't help the path explanation become clearer.
526+
n.asVar() instanceof EssaDefinition and
527+
// For a parameter we have flow from ControlFlowNode to SSA node, and then onwards
528+
// with use-use flow, and since the CFN is already part of the path graph, we don't
529+
// want to force showing the SSA node as well.
530+
not n.asVar() instanceof ParameterDefinition
525531
}
526532

527533
/**
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
category: minorAnalysis
3+
---
4+
* Updated path explanations for `@kind path-problem` queries to always include left hand side of assignments, making paths easier to understand.

python/ql/test/experimental/dataflow/basic/globalStep.expected

Lines changed: 0 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
22
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
3-
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
43
| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
54
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:1:19:1:19 | SSA variable x |
65
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:1:19:1:19 | SSA variable x |
@@ -14,18 +13,6 @@
1413
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:2:7:2:7 | ControlFlowNode for x |
1514
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:2:7:2:7 | ControlFlowNode for x |
1615
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:2:7:2:7 | ControlFlowNode for x |
17-
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:3:3:3:3 | SSA variable z |
18-
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:3:3:3:3 | SSA variable z |
19-
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:3:3:3:3 | SSA variable z |
20-
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:3:3:3:3 | SSA variable z |
21-
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:3:7:3:7 | ControlFlowNode for y |
22-
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:3:7:3:7 | ControlFlowNode for y |
23-
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:3:7:3:7 | ControlFlowNode for y |
24-
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:3:7:3:7 | ControlFlowNode for y |
25-
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:4:10:4:10 | ControlFlowNode for z |
26-
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:4:10:4:10 | ControlFlowNode for z |
27-
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:4:10:4:10 | ControlFlowNode for z |
28-
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:4:10:4:10 | ControlFlowNode for z |
2916
| test.py:1:19:1:19 | SSA variable x | test.py:2:3:2:3 | SSA variable y |
3017
| test.py:1:19:1:19 | SSA variable x | test.py:2:3:2:3 | SSA variable y |
3118
| test.py:1:19:1:19 | SSA variable x | test.py:2:3:2:3 | SSA variable y |
@@ -34,18 +21,6 @@
3421
| test.py:1:19:1:19 | SSA variable x | test.py:2:7:2:7 | ControlFlowNode for x |
3522
| test.py:1:19:1:19 | SSA variable x | test.py:2:7:2:7 | ControlFlowNode for x |
3623
| test.py:1:19:1:19 | SSA variable x | test.py:2:7:2:7 | ControlFlowNode for x |
37-
| test.py:1:19:1:19 | SSA variable x | test.py:3:3:3:3 | SSA variable z |
38-
| test.py:1:19:1:19 | SSA variable x | test.py:3:3:3:3 | SSA variable z |
39-
| test.py:1:19:1:19 | SSA variable x | test.py:3:3:3:3 | SSA variable z |
40-
| test.py:1:19:1:19 | SSA variable x | test.py:3:3:3:3 | SSA variable z |
41-
| test.py:1:19:1:19 | SSA variable x | test.py:3:7:3:7 | ControlFlowNode for y |
42-
| test.py:1:19:1:19 | SSA variable x | test.py:3:7:3:7 | ControlFlowNode for y |
43-
| test.py:1:19:1:19 | SSA variable x | test.py:3:7:3:7 | ControlFlowNode for y |
44-
| test.py:1:19:1:19 | SSA variable x | test.py:3:7:3:7 | ControlFlowNode for y |
45-
| test.py:1:19:1:19 | SSA variable x | test.py:4:10:4:10 | ControlFlowNode for z |
46-
| test.py:1:19:1:19 | SSA variable x | test.py:4:10:4:10 | ControlFlowNode for z |
47-
| test.py:1:19:1:19 | SSA variable x | test.py:4:10:4:10 | ControlFlowNode for z |
48-
| test.py:1:19:1:19 | SSA variable x | test.py:4:10:4:10 | ControlFlowNode for z |
4924
| test.py:2:3:2:3 | SSA variable y | test.py:3:3:3:3 | SSA variable z |
5025
| test.py:2:3:2:3 | SSA variable y | test.py:3:3:3:3 | SSA variable z |
5126
| test.py:2:3:2:3 | SSA variable y | test.py:3:3:3:3 | SSA variable z |
@@ -54,26 +29,10 @@
5429
| test.py:2:3:2:3 | SSA variable y | test.py:3:7:3:7 | ControlFlowNode for y |
5530
| test.py:2:3:2:3 | SSA variable y | test.py:3:7:3:7 | ControlFlowNode for y |
5631
| test.py:2:3:2:3 | SSA variable y | test.py:3:7:3:7 | ControlFlowNode for y |
57-
| test.py:2:3:2:3 | SSA variable y | test.py:4:10:4:10 | ControlFlowNode for z |
58-
| test.py:2:3:2:3 | SSA variable y | test.py:4:10:4:10 | ControlFlowNode for z |
59-
| test.py:2:3:2:3 | SSA variable y | test.py:4:10:4:10 | ControlFlowNode for z |
60-
| test.py:2:3:2:3 | SSA variable y | test.py:4:10:4:10 | ControlFlowNode for z |
6132
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:2:3:2:3 | SSA variable y |
6233
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:2:3:2:3 | SSA variable y |
6334
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:2:3:2:3 | SSA variable y |
6435
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:2:3:2:3 | SSA variable y |
65-
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:3:3:3 | SSA variable z |
66-
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:3:3:3 | SSA variable z |
67-
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:3:3:3 | SSA variable z |
68-
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:3:3:3 | SSA variable z |
69-
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:7:3:7 | ControlFlowNode for y |
70-
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:7:3:7 | ControlFlowNode for y |
71-
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:7:3:7 | ControlFlowNode for y |
72-
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:3:7:3:7 | ControlFlowNode for y |
73-
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:4:10:4:10 | ControlFlowNode for z |
74-
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:4:10:4:10 | ControlFlowNode for z |
75-
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:4:10:4:10 | ControlFlowNode for z |
76-
| test.py:2:7:2:7 | ControlFlowNode for x | test.py:4:10:4:10 | ControlFlowNode for z |
7736
| test.py:3:3:3:3 | SSA variable z | test.py:4:10:4:10 | ControlFlowNode for z |
7837
| test.py:3:3:3:3 | SSA variable z | test.py:4:10:4:10 | ControlFlowNode for z |
7938
| test.py:3:3:3:3 | SSA variable z | test.py:4:10:4:10 | ControlFlowNode for z |
@@ -82,18 +41,12 @@
8241
| test.py:3:7:3:7 | ControlFlowNode for y | test.py:3:3:3:3 | SSA variable z |
8342
| test.py:3:7:3:7 | ControlFlowNode for y | test.py:3:3:3:3 | SSA variable z |
8443
| test.py:3:7:3:7 | ControlFlowNode for y | test.py:3:3:3:3 | SSA variable z |
85-
| test.py:3:7:3:7 | ControlFlowNode for y | test.py:4:10:4:10 | ControlFlowNode for z |
86-
| test.py:3:7:3:7 | ControlFlowNode for y | test.py:4:10:4:10 | ControlFlowNode for z |
87-
| test.py:3:7:3:7 | ControlFlowNode for y | test.py:4:10:4:10 | ControlFlowNode for z |
88-
| test.py:3:7:3:7 | ControlFlowNode for y | test.py:4:10:4:10 | ControlFlowNode for z |
8944
| test.py:4:10:4:10 | ControlFlowNode for z | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
9045
| test.py:4:10:4:10 | ControlFlowNode for z | test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
9146
| test.py:6:1:6:1 | GSSA Variable a | test.py:7:19:7:19 | ControlFlowNode for a |
9247
| test.py:6:1:6:1 | GSSA Variable a | test.py:7:19:7:19 | ControlFlowNode for a |
9348
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:6:1:6:1 | GSSA Variable a |
9449
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:6:1:6:1 | GSSA Variable a |
95-
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:7:19:7:19 | ControlFlowNode for a |
96-
| test.py:6:5:6:6 | ControlFlowNode for IntegerLiteral | test.py:7:19:7:19 | ControlFlowNode for a |
9750
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() | test.py:7:1:7:1 | GSSA Variable b |
9851
| test.py:7:19:7:19 | ControlFlowNode for a | test.py:1:19:1:19 | ControlFlowNode for x |
9952
| test.py:7:19:7:19 | ControlFlowNode for a | test.py:1:19:1:19 | ControlFlowNode for x |
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
failures
2+
testFailures
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
/**
2+
* @kind path-problem
3+
*/
4+
5+
import python
6+
import semmle.python.dataflow.new.DataFlow
7+
import semmle.python.dataflow.new.TaintTracking
8+
import experimental.dataflow.testConfig
9+
import TestUtilities.InlineExpectationsTest
10+
11+
module TestTaintFlow = TaintTracking::Global<TestConfig>;
12+
13+
module PathNodeTest implements TestSig {
14+
string getARelevantTag() { result = "path-node" }
15+
16+
predicate hasActualResult(Location location, string element, string tag, string value) {
17+
exists(TestTaintFlow::PathNode pn |
18+
location = pn.getNode().getLocation() and
19+
tag = "path-node" and
20+
value = "" and
21+
element = pn.toString()
22+
)
23+
}
24+
}
25+
26+
import MakeTest<PathNodeTest>
27+
// running the query to inspect the results can be quite nice!
28+
// just uncomment these lines!
29+
// import TestTaintFlow::PathGraph
30+
// from TestTaintFlow::PathNode source, TestTaintFlow::PathNode sink
31+
// where TestTaintFlow::flowPath(source, sink)
32+
// select sink.getNode(), source, sink,
33+
// sink.getNode().getEnclosingCallable().toString() + ": --> " +
34+
// sink.getNode().getLocation().getStartLine().toString()
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
def assign():
2+
x = SOURCE # $ path-node
3+
4+
y = x # $ path-node
5+
6+
SINK(y) # $ path-node
7+
8+
9+
def aug_assign():
10+
x = SOURCE # $ path-node
11+
z = ""
12+
13+
z += x # $ path-node
14+
15+
SINK(z) # $ path-node
16+
17+
18+
def dont_use_rhs(cond):
19+
# like noted in the original Ruby PR: https://github.com/github/codeql/pull/12566
20+
x = SOURCE # $ path-node
21+
22+
if cond:
23+
y = x
24+
25+
SINK(x) # $ path-node
26+
27+
28+
def flow_through_function():
29+
def identify(x): # $ path-node
30+
return x # $ path-node
31+
32+
x = SOURCE # $ path-node
33+
34+
y = identify(x) # $ path-node
35+
36+
SINK(y) # $ path-node
37+
38+
39+
def attribute():
40+
class X: pass
41+
x = X()
42+
x.attr = SOURCE # $ path-node
43+
44+
y = x # $ path-node
45+
46+
SINK(y.attr) # $ path-node
47+
48+
49+
def list_loop():
50+
x = SOURCE # $ path-node
51+
l = list()
52+
53+
l.append(x) # $ path-node
54+
55+
for y in l: # $ path-node
56+
57+
SINK(y) # $ path-node
58+
59+
60+
def list_index():
61+
x = SOURCE # $ path-node
62+
l = list()
63+
64+
l.append(x) # $ path-node
65+
66+
z = l[0] # $ path-node
67+
68+
SINK(z) # $ path-node
69+
70+
71+
def test_tuple():
72+
x = SOURCE # $ path-node
73+
74+
y = ((x, 1), 2) # $ path-node
75+
76+
(z, _), _ = y # $ path-node
77+
78+
SINK(z) # $ path-node
79+
80+
81+
def test_with():
82+
x = SOURCE # $ path-node
83+
84+
with x as y: # $ path-node
85+
86+
SINK(y) # $ path-node
87+
88+
89+
def test_match():
90+
x = SOURCE # $ path-node
91+
92+
match x:
93+
94+
case y: # $ path-node
95+
96+
SINK(y) # $ path-node

0 commit comments

Comments
 (0)