Skip to content

Commit d953382

Browse files
authored
Merge pull request github#7807 from RasmusWL/dataflow-improvements
Python: Dataflow improvements
2 parents 5cba505 + 0e0f159 commit d953382

File tree

136 files changed

+1367
-2951
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

136 files changed

+1367
-2951
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
category: minorAnalysis
3+
---
4+
* Improved analysis of attributes for data-flow and taint tracking queries, so `getattr`/`setattr` are supported, and a write to an attribute properly stops flow for the old value in that attribute.
5+
* Added post-update nodes (`DataFlow::PostUpdateNode`) for arguments in calls that can't be resolved.

python/ql/lib/semmle/python/dataflow/new/internal/Attributes.qll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,8 @@ abstract class AttrRead extends AttrRef, Node, LocalSourceNode { }
204204
private class AttributeReadAsAttrRead extends AttrRead, CfgNode {
205205
override AttrNode node;
206206

207+
AttributeReadAsAttrRead() { node.isLoad() }
208+
207209
override Node getObject() { result.asCfgNode() = node.getObject() }
208210

209211
override ExprNode getAttributeNameExpr() {

python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll

Lines changed: 30 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ module syntheticPostUpdateNode {
126126
* Certain arguments, such as implicit self arguments are already post-update nodes
127127
* and should not have an extra node synthesised.
128128
*/
129-
ArgumentNode argumentPreUpdateNode() {
129+
Node argumentPreUpdateNode() {
130130
result = any(FunctionCall c).getArg(_)
131131
or
132132
// Avoid argument 0 of method calls as those have read post-update nodes.
@@ -136,6 +136,11 @@ module syntheticPostUpdateNode {
136136
or
137137
// Avoid argument 0 of class calls as those have non-synthetic post-update nodes.
138138
exists(ClassCall c, int n | n > 0 | result = c.getArg(n))
139+
or
140+
// any argument of any call that we have not been able to resolve
141+
exists(CallNode call | not call = any(DataFlowCall c).getNode() |
142+
result.(CfgNode).getNode() in [call.getArg(_), call.getArgByName(_)]
143+
)
139144
}
140145

141146
/** An object might have its value changed after a store. */
@@ -704,7 +709,7 @@ newtype TDataFlowCall =
704709
TFunctionCall(CallNode call) { call = any(FunctionValue f).getAFunctionCall() } or
705710
/** Bound methods need to make room for the explicit self parameter */
706711
TMethodCall(CallNode call) { call = any(FunctionValue f).getAMethodCall() } or
707-
TClassCall(CallNode call) { call = any(ClassValue c).getACall() } or
712+
TClassCall(CallNode call) { call = any(ClassValue c | not c.isAbsent()).getACall() } or
708713
TSpecialCall(SpecialMethodCallNode special)
709714

710715
/** Represents a call. */
@@ -1067,19 +1072,18 @@ predicate comprehensionStoreStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
10671072
}
10681073

10691074
/**
1070-
* Holds if `nodeFrom` flows into an attribute (corresponding to `c`) of `nodeTo` via an attribute assignment.
1075+
* Holds if `nodeFrom` flows into the attribute `c` of `nodeTo` via an attribute assignment.
10711076
*
10721077
* For example, in
10731078
* ```python
10741079
* obj.foo = x
10751080
* ```
1076-
* data flows from `x` to (the post-update node for) `obj` via assignment to `foo`.
1081+
* data flows from `x` to the attribute `foo` of (the post-update node for) `obj`.
10771082
*/
1078-
predicate attributeStoreStep(CfgNode nodeFrom, AttributeContent c, PostUpdateNode nodeTo) {
1079-
exists(AttrNode attr |
1080-
nodeFrom.asCfgNode() = attr.(DefinitionNode).getValue() and
1081-
attr.getName() = c.getAttribute() and
1082-
attr.getObject() = nodeTo.getPreUpdateNode().(CfgNode).getNode()
1083+
predicate attributeStoreStep(Node nodeFrom, AttributeContent c, PostUpdateNode nodeTo) {
1084+
exists(AttrWrite write |
1085+
write.accesses(nodeTo.getPreUpdateNode(), c.getAttribute()) and
1086+
nodeFrom = write.getValue()
10831087
)
10841088
}
10851089

@@ -1923,21 +1927,16 @@ pragma[noinline]
19231927
TupleElementContent small_tuple() { result.getIndex() <= 7 }
19241928

19251929
/**
1926-
* Holds if `nodeTo` is a read of an attribute (corresponding to `c`) of the object in `nodeFrom`.
1930+
* Holds if `nodeTo` is a read of the attribute `c` of the object `nodeFrom`.
19271931
*
1928-
* For example, in
1932+
* For example
19291933
* ```python
19301934
* obj.foo
19311935
* ```
1932-
* data flows from `obj` to `obj.foo` via a read from `foo`.
1936+
* is a read of the attribute `foo` from the object `obj`.
19331937
*/
1934-
predicate attributeReadStep(CfgNode nodeFrom, AttributeContent c, CfgNode nodeTo) {
1935-
exists(AttrNode attr |
1936-
nodeFrom.asCfgNode() = attr.getObject() and
1937-
nodeTo.asCfgNode() = attr and
1938-
attr.getName() = c.getAttribute() and
1939-
attr.isLoad()
1940-
)
1938+
predicate attributeReadStep(Node nodeFrom, AttributeContent c, AttrRead nodeTo) {
1939+
nodeTo.accesses(nodeFrom, c.getAttribute())
19411940
}
19421941

19431942
/**
@@ -1973,6 +1972,18 @@ predicate clearsContent(Node n, Content c) {
19731972
kwOverflowClearStep(n, c)
19741973
or
19751974
matchClearStep(n, c)
1975+
or
1976+
attributeClearStep(n, c)
1977+
}
1978+
1979+
/**
1980+
* Holds if values stored inside attribute `c` are cleared at node `n`.
1981+
*
1982+
* In `obj.foo = x` any old value stored in `foo` is cleared at the pre-update node
1983+
* associated with `obj`
1984+
*/
1985+
predicate attributeClearStep(Node n, AttributeContent c) {
1986+
exists(PostUpdateNode post | post.getPreUpdateNode() = n | attributeStoreStep(_, c, post))
19761987
}
19771988

19781989
//--------

python/ql/lib/semmle/python/dataflow/new/internal/PrintNode.qll

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
private import python
1111
private import semmle.python.dataflow.new.DataFlow
12+
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
1213

1314
/**
1415
* INTERNAL: Do not use.
@@ -66,7 +67,12 @@ string prettyNodeForInlineTest(DataFlow::Node node) {
6667
result = "[post]" + prettyExpr(e)
6768
)
6869
or
70+
exists(Expr e | e = node.(DataFlowPrivate::SyntheticPreUpdateNode).getPostUpdateNode().asExpr() |
71+
result = "[pre]" + prettyExpr(e)
72+
)
73+
or
6974
not exists(node.asExpr()) and
7075
not exists(node.(DataFlow::PostUpdateNode).getPreUpdateNode().asExpr()) and
76+
not exists(node.(DataFlowPrivate::SyntheticPreUpdateNode).getPostUpdateNode().asExpr()) and
7177
result = node.toString()
7278
}
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
| test.py:10:1:10:9 | ControlFlowNode for Attribute | test.py:10:1:10:5 | ControlFlowNode for myobj | foo |
2+
| test.py:13:1:13:21 | ControlFlowNode for getattr() | test.py:13:9:13:13 | ControlFlowNode for myobj | foo |
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
import python
2+
private import semmle.python.dataflow.new.DataFlow
3+
4+
from DataFlow::AttrRead read
5+
select read, read.getObject(), read.getAttributeName()
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
| test.py:5:9:5:16 | ControlFlowNode for __init__ | test.py:4:1:4:20 | ControlFlowNode for ClassExpr | __init__ | test.py:5:5:5:28 | ControlFlowNode for FunctionExpr |
2+
| test.py:6:9:6:16 | ControlFlowNode for Attribute | test.py:6:9:6:12 | ControlFlowNode for self | foo | test.py:6:20:6:22 | ControlFlowNode for foo |
3+
| test.py:9:1:9:9 | ControlFlowNode for Attribute | test.py:9:1:9:5 | ControlFlowNode for myobj | foo | test.py:9:13:9:17 | ControlFlowNode for Str |
4+
| test.py:12:1:12:25 | ControlFlowNode for setattr() | test.py:12:9:12:13 | ControlFlowNode for myobj | foo | test.py:12:23:12:24 | ControlFlowNode for IntegerLiteral |
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
import python
2+
private import semmle.python.dataflow.new.DataFlow
3+
4+
from DataFlow::AttrWrite write
5+
select write, write.getObject(), write.getAttributeName(), write.getValue()
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# This file is a simple test of which nodes are included with AttrRead/AttrWrite.
2+
# For actual data-flow tests, see fieldflow/ dir.
3+
4+
class MyObj(object):
5+
def __init__(self, foo):
6+
self.foo = foo
7+
8+
myobj = MyObj("foo")
9+
myobj.foo = "bar"
10+
myobj.foo
11+
12+
setattr(myobj, "foo", 42)
13+
getattr(myobj, "foo")
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import python
2+
import experimental.dataflow.TestUtil.FlowTest
3+
import experimental.dataflow.testConfig
4+
private import semmle.python.dataflow.new.internal.PrintNode
5+
6+
class DataFlowTest extends FlowTest {
7+
DataFlowTest() { this = "DataFlowTest" }
8+
9+
override string flowTag() { result = "flow" }
10+
11+
override predicate relevantFlow(DataFlow::Node source, DataFlow::Node sink) {
12+
exists(TestConfiguration cfg | cfg.hasFlow(source, sink))
13+
}
14+
}
15+
16+
query predicate missingAnnotationOnSINK(Location location, string error, string element) {
17+
error = "ERROR, you should add `# $ MISSING: flow` annotation" and
18+
exists(DataFlow::Node sink |
19+
exists(DataFlow::CallCfgNode call |
20+
// note: we only care about `SINK` and not `SINK_F`, so we have to reconstruct manually.
21+
call.getFunction().asCfgNode().(NameNode).getId() = "SINK" and
22+
(sink = call.getArg(_) or sink = call.getArgByName(_))
23+
) and
24+
location = sink.getLocation() and
25+
element = prettyExpr(sink.asExpr()) and
26+
not any(TestConfiguration config).hasFlow(_, sink) and
27+
not exists(FalseNegativeExpectation missingResult |
28+
missingResult.getTag() = "flow" and
29+
missingResult.getLocation().getFile() = location.getFile() and
30+
missingResult.getLocation().getStartLine() = location.getStartLine()
31+
)
32+
)
33+
}

0 commit comments

Comments
 (0)