Skip to content

Commit 1bba525

Browse files
authored
Merge pull request github#11280 from RasmusWL/dict-dataflow-steps
Python: Support more dictionary read/store steps
2 parents 3d41cd5 + abc1d65 commit 1bba525

File tree

19 files changed

+185
-9
lines changed

19 files changed

+185
-9
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
category: minorAnalysis
3+
---
4+
* Added more content-flow/field-flow for dictionaries, by adding support for reads through `mydict.get("key")` and `mydict.setdefault("key", value)`, and store steps through `dict["key"] = value` and `mydict.setdefault("key", value)`.

python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll

Lines changed: 48 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -588,6 +588,8 @@ predicate storeStep(Node nodeFrom, Content c, Node nodeTo) {
588588
or
589589
dictStoreStep(nodeFrom, c, nodeTo)
590590
or
591+
moreDictStoreSteps(nodeFrom, c, nodeTo)
592+
or
591593
comprehensionStoreStep(nodeFrom, c, nodeTo)
592594
or
593595
iterableUnpackingStoreStep(nodeFrom, c, nodeTo)
@@ -688,19 +690,48 @@ predicate tupleStoreStep(CfgNode nodeFrom, TupleElementContent c, CfgNode nodeTo
688690
}
689691

690692
/** Data flows from an element of a dictionary to the dictionary at a specific key. */
691-
predicate dictStoreStep(CfgNode nodeFrom, DictionaryElementContent c, CfgNode nodeTo) {
693+
predicate dictStoreStep(CfgNode nodeFrom, DictionaryElementContent c, Node nodeTo) {
692694
// Dictionary
693695
// `{..., "key" = 42, ...}`
694696
// nodeFrom is `42`, cfg node
695697
// nodeTo is the dict, `{..., "key" = 42, ...}`, cfg node
696698
// c denotes element of dictionary and the key `"key"`
697699
exists(KeyValuePair item |
698-
item = nodeTo.getNode().(DictNode).getNode().(Dict).getAnItem() and
700+
item = nodeTo.asCfgNode().(DictNode).getNode().(Dict).getAnItem() and
699701
nodeFrom.getNode().getNode() = item.getValue() and
700702
c.getKey() = item.getKey().(StrConst).getS()
701703
)
702704
}
703705

706+
/**
707+
* This has been made private since `dictStoreStep` is used by taint-tracking, and
708+
* adding these extra steps made some alerts very noisy.
709+
*
710+
* TODO: Once TaintTracking no longer uses `dictStoreStep`, unify the two predicates.
711+
*/
712+
private predicate moreDictStoreSteps(CfgNode nodeFrom, DictionaryElementContent c, Node nodeTo) {
713+
exists(SubscriptNode subscript |
714+
nodeTo.(PostUpdateNode).getPreUpdateNode().asCfgNode() = subscript.getObject() and
715+
nodeFrom.asCfgNode() = subscript.(DefinitionNode).getValue() and
716+
c.getKey() = subscript.getIndex().getNode().(StrConst).getText()
717+
)
718+
or
719+
// see https://docs.python.org/3.10/library/stdtypes.html#dict.setdefault
720+
exists(MethodCallNode call |
721+
call.calls(nodeTo.(PostUpdateNode).getPreUpdateNode(), "setdefault") and
722+
call.getArg(0).asExpr().(StrConst).getText() = c.getKey() and
723+
nodeFrom = call.getArg(1)
724+
)
725+
}
726+
727+
predicate dictClearStep(Node node, DictionaryElementContent c) {
728+
exists(SubscriptNode subscript |
729+
subscript instanceof DefinitionNode and
730+
node.asCfgNode() = subscript.getObject() and
731+
c.getKey() = subscript.getIndex().getNode().(StrConst).getText()
732+
)
733+
}
734+
704735
/** Data flows from an element expression in a comprehension to the comprehension. */
705736
predicate comprehensionStoreStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
706737
// Comprehension
@@ -761,6 +792,8 @@ predicate defaultValueFlowStep(CfgNode nodeFrom, CfgNode nodeTo) {
761792
predicate readStep(Node nodeFrom, Content c, Node nodeTo) {
762793
subscriptReadStep(nodeFrom, c, nodeTo)
763794
or
795+
dictReadStep(nodeFrom, c, nodeTo)
796+
or
764797
iterableUnpackingReadStep(nodeFrom, c, nodeTo)
765798
or
766799
matchReadStep(nodeFrom, c, nodeTo)
@@ -799,6 +832,17 @@ predicate subscriptReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
799832
)
800833
}
801834

835+
predicate dictReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
836+
// see
837+
// - https://docs.python.org/3.10/library/stdtypes.html#dict.get
838+
// - https://docs.python.org/3.10/library/stdtypes.html#dict.setdefault
839+
exists(MethodCallNode call |
840+
call.calls(nodeFrom, ["get", "setdefault"]) and
841+
call.getArg(0).asExpr().(StrConst).getText() = c.(DictionaryElementContent).getKey() and
842+
nodeTo = call
843+
)
844+
}
845+
802846
/** Data flows from a sequence to a call to `pop` on the sequence. */
803847
predicate popReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
804848
// set.pop or list.pop
@@ -873,6 +917,8 @@ predicate clearsContent(Node n, Content c) {
873917
or
874918
attributeClearStep(n, c)
875919
or
920+
dictClearStep(n, c)
921+
or
876922
FlowSummaryImpl::Private::Steps::summaryClearsContent(n, c)
877923
or
878924
dictSplatParameterNodeClearStep(n, c)

python/ql/lib/semmle/python/frameworks/Stdlib.qll

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3795,6 +3795,30 @@ private module StdlibPrivate {
37953795
preservesValue = true
37963796
}
37973797
}
3798+
3799+
/**
3800+
* A flow summary for `dict.setdefault`.
3801+
*
3802+
* See https://docs.python.org/3.10/library/stdtypes.html#dict.setdefault
3803+
*/
3804+
class DictSetdefaultSummary extends SummarizedCallable {
3805+
DictSetdefaultSummary() { this = "dict.setdefault" }
3806+
3807+
override DataFlow::CallCfgNode getACall() {
3808+
result.(DataFlow::MethodCallNode).calls(_, "setdefault")
3809+
}
3810+
3811+
override DataFlow::ArgumentNode getACallback() {
3812+
result.(DataFlow::AttrRead).getAttributeName() = "setdefault"
3813+
}
3814+
3815+
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
3816+
// store/read steps with dictionary content of this is modeled in DataFlowPrivate
3817+
input = "Argument[1]" and
3818+
output = "ReturnValue" and
3819+
preservesValue = true
3820+
}
3821+
}
37983822
}
37993823

38003824
// ---------------------------------------------------------------------------

python/ql/test/experimental/dataflow/TestUtil/UnresolvedCalls.qll

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,17 @@ class UnresolvedCallExpectations extends InlineExpectationsTest {
99

1010
override string getARelevantTag() { result = "unresolved_call" }
1111

12+
predicate unresolvedCall(CallNode call) {
13+
not exists(DataFlowPrivate::DataFlowCall dfc |
14+
exists(dfc.getCallable()) and dfc.getNode() = call
15+
) and
16+
not DataFlowPrivate::resolveClassCall(call, _) and
17+
not call = API::builtin(_).getACall().asCfgNode()
18+
}
19+
1220
override predicate hasActualResult(Location location, string element, string tag, string value) {
1321
exists(location.getFile().getRelativePath()) and
14-
exists(CallNode call |
15-
not exists(DataFlowPrivate::DataFlowCall dfc |
16-
exists(dfc.getCallable()) and dfc.getNode() = call
17-
) and
18-
not DataFlowPrivate::resolveClassCall(call, _) and
19-
not call = API::builtin(_).getACall().asCfgNode() and
22+
exists(CallNode call | this.unresolvedCall(call) |
2023
location = call.getLocation() and
2124
tag = "unresolved_call" and
2225
value = prettyExpr(call.getNode()) and
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
| file://:0:0:0:0 | parameter position 0 of builtins.reversed |
2+
| file://:0:0:0:0 | parameter position 1 of dict.setdefault |
23
| test.py:1:1:1:21 | SynthDictSplatParameterNode |
34
| test.py:1:19:1:19 | ControlFlowNode for x |
45
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
| file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed |
2+
| file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault |
23
| test.py:4:10:4:10 | ControlFlowNode for z |
34
| test.py:7:19:7:19 | ControlFlowNode for a |

python/ql/test/experimental/dataflow/basic/global.expected

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
2+
| file://:0:0:0:0 | parameter position 1 of dict.setdefault | file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault |
23
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
34
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
45
| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |

python/ql/test/experimental/dataflow/basic/globalStep.expected

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
2+
| file://:0:0:0:0 | parameter position 1 of dict.setdefault | file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault |
23
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
34
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
45
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |

python/ql/test/experimental/dataflow/basic/local.expected

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed |
22
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
33
| file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed |
4+
| file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault | file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault |
45
| file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
56
| file://:0:0:0:0 | parameter position 0 of builtins.reversed | file://:0:0:0:0 | parameter position 0 of builtins.reversed |
7+
| file://:0:0:0:0 | parameter position 1 of dict.setdefault | file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault |
8+
| file://:0:0:0:0 | parameter position 1 of dict.setdefault | file://:0:0:0:0 | parameter position 1 of dict.setdefault |
69
| test.py:0:0:0:0 | GSSA Variable __name__ | test.py:0:0:0:0 | GSSA Variable __name__ |
710
| test.py:0:0:0:0 | GSSA Variable __package__ | test.py:0:0:0:0 | GSSA Variable __package__ |
811
| test.py:0:0:0:0 | GSSA Variable b | test.py:0:0:0:0 | GSSA Variable b |

python/ql/test/experimental/dataflow/basic/localStep.expected

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
2+
| file://:0:0:0:0 | parameter position 1 of dict.setdefault | file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault |
23
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
34
| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
45
| test.py:1:19:1:19 | ControlFlowNode for x | test.py:1:19:1:19 | SSA variable x |

0 commit comments

Comments
 (0)