Skip to content

Commit b568695

Browse files
committed
Python: Support more dictionary read/store steps
The `setdefault` behavior is kinda strange, but no reason not to support it.
1 parent 6e31f64 commit b568695

File tree

3 files changed

+49
-9
lines changed

3 files changed

+49
-9
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
category: minorAnalysis
3+
---
4+
* Added more content-flow/field-flow for dictionaries, by adding support for reads through `mydict.get("key")` and `mydict.setdefault("key", value)`, and store steps through `dict["key"] = value` and `mydict.setdefault("key", value)`.

python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -688,17 +688,38 @@ predicate tupleStoreStep(CfgNode nodeFrom, TupleElementContent c, CfgNode nodeTo
688688
}
689689

690690
/** Data flows from an element of a dictionary to the dictionary at a specific key. */
691-
predicate dictStoreStep(CfgNode nodeFrom, DictionaryElementContent c, CfgNode nodeTo) {
691+
predicate dictStoreStep(CfgNode nodeFrom, DictionaryElementContent c, Node nodeTo) {
692692
// Dictionary
693693
// `{..., "key" = 42, ...}`
694694
// nodeFrom is `42`, cfg node
695695
// nodeTo is the dict, `{..., "key" = 42, ...}`, cfg node
696696
// c denotes element of dictionary and the key `"key"`
697697
exists(KeyValuePair item |
698-
item = nodeTo.getNode().(DictNode).getNode().(Dict).getAnItem() and
698+
item = nodeTo.asCfgNode().(DictNode).getNode().(Dict).getAnItem() and
699699
nodeFrom.getNode().getNode() = item.getValue() and
700700
c.getKey() = item.getKey().(StrConst).getS()
701701
)
702+
or
703+
exists(SubscriptNode subscript |
704+
nodeTo.(PostUpdateNode).getPreUpdateNode().asCfgNode() = subscript.getObject() and
705+
nodeFrom.asCfgNode() = subscript.(DefinitionNode).getValue() and
706+
c.getKey() = subscript.getIndex().getNode().(StrConst).getText()
707+
)
708+
or
709+
// see https://docs.python.org/3.10/library/stdtypes.html#dict.setdefault
710+
exists(MethodCallNode call |
711+
call.calls(nodeTo.(PostUpdateNode).getPreUpdateNode(), ["setdefault"]) and
712+
call.getArg(0).asExpr().(StrConst).getText() = c.(DictionaryElementContent).getKey() and
713+
nodeFrom = call.getArg(1)
714+
)
715+
}
716+
717+
predicate dictClearStep(Node node, DictionaryElementContent c) {
718+
exists(SubscriptNode subscript |
719+
subscript instanceof DefinitionNode and
720+
node.asCfgNode() = subscript.getObject() and
721+
c.getKey() = subscript.getIndex().getNode().(StrConst).getText()
722+
)
702723
}
703724

704725
/** Data flows from an element expression in a comprehension to the comprehension. */
@@ -761,6 +782,8 @@ predicate defaultValueFlowStep(CfgNode nodeFrom, CfgNode nodeTo) {
761782
predicate readStep(Node nodeFrom, Content c, Node nodeTo) {
762783
subscriptReadStep(nodeFrom, c, nodeTo)
763784
or
785+
dictReadStep(nodeFrom, c, nodeTo)
786+
or
764787
iterableUnpackingReadStep(nodeFrom, c, nodeTo)
765788
or
766789
matchReadStep(nodeFrom, c, nodeTo)
@@ -799,6 +822,17 @@ predicate subscriptReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
799822
)
800823
}
801824

825+
predicate dictReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
826+
// see
827+
// - https://docs.python.org/3.10/library/stdtypes.html#dict.get
828+
// - https://docs.python.org/3.10/library/stdtypes.html#dict.setdefault
829+
exists(MethodCallNode call |
830+
call.calls(nodeFrom, ["get", "setdefault"]) and
831+
call.getArg(0).asExpr().(StrConst).getText() = c.(DictionaryElementContent).getKey() and
832+
nodeTo = call
833+
)
834+
}
835+
802836
/** Data flows from a sequence to a call to `pop` on the sequence. */
803837
predicate popReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
804838
// set.pop or list.pop
@@ -873,6 +907,8 @@ predicate clearsContent(Node n, Content c) {
873907
or
874908
attributeClearStep(n, c)
875909
or
910+
dictClearStep(n, c)
911+
or
876912
FlowSummaryImpl::Private::Steps::summaryClearsContent(n, c)
877913
or
878914
dictSplatParameterNodeClearStep(n, c)

python/ql/test/experimental/dataflow/fieldflow/test_dict.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,24 +35,24 @@ def SINK_F(x):
3535
def test_dict_literal():
3636
d = {"key": SOURCE}
3737
SINK(d["key"]) # $ flow="SOURCE, l:-1 -> d['key']"
38-
SINK(d.get("key")) # $ MISSING: flow
39-
SINK(d.setdefault("key", NONSOURCE)) # $ MISSING: flow
38+
SINK(d.get("key")) # $ flow="SOURCE, l:-2 -> d.get(..)"
39+
SINK(d.setdefault("key", NONSOURCE)) # $ flow="SOURCE, l:-3 -> d.setdefault(..)"
4040

4141

4242
@expects(3) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
4343
def test_dict_update():
4444
d = {}
4545
d["key"] = SOURCE
46-
SINK(d["key"]) # $ MISSING: flow
47-
SINK(d.get("key")) # $ MISSING: flow
48-
SINK(d.setdefault("key", NONSOURCE)) # $ MISSING: flow
46+
SINK(d["key"]) # $ flow="SOURCE, l:-1 -> d['key']"
47+
SINK(d.get("key")) # $ flow="SOURCE, l:-2 -> d.get(..)"
48+
SINK(d.setdefault("key", NONSOURCE)) # $ flow="SOURCE, l:-3 -> d.setdefault(..)"
4949

5050

5151
@expects(2) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)
5252
def test_dict_override():
5353
d = {}
5454
d["key"] = SOURCE
55-
SINK(d["key"]) # $ MISSING: flow
55+
SINK(d["key"]) # $ flow="SOURCE, l:-1 -> d['key']"
5656

5757
d["key"] = NONSOURCE
5858
SINK_F(d["key"])
@@ -61,7 +61,7 @@ def test_dict_override():
6161
def test_dict_setdefault():
6262
d = {}
6363
d.setdefault("key", SOURCE)
64-
SINK(d["key"]) # $ MISSING: flow
64+
SINK(d["key"]) # $ flow="SOURCE, l:-1 -> d['key']"
6565

6666

6767
@expects(3) # $ unresolved_call=expects(..) unresolved_call=expects(..)(..)

0 commit comments

Comments
 (0)