Skip to content

Commit 1d65284

Browse files
authored
Merge pull request github#13209 from yoff/python/container-summaries-2
python: Container summaries, part 2
2 parents 7280f07 + 775f3ea commit 1d65284

File tree

18 files changed

+407
-146
lines changed

18 files changed

+407
-146
lines changed

python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll

Lines changed: 0 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -794,14 +794,10 @@ predicate defaultValueFlowStep(CfgNode nodeFrom, CfgNode nodeTo) {
794794
predicate readStep(Node nodeFrom, Content c, Node nodeTo) {
795795
subscriptReadStep(nodeFrom, c, nodeTo)
796796
or
797-
dictReadStep(nodeFrom, c, nodeTo)
798-
or
799797
iterableUnpackingReadStep(nodeFrom, c, nodeTo)
800798
or
801799
matchReadStep(nodeFrom, c, nodeTo)
802800
or
803-
popReadStep(nodeFrom, c, nodeTo)
804-
or
805801
forReadStep(nodeFrom, c, nodeTo)
806802
or
807803
attributeReadStep(nodeFrom, c, nodeTo)
@@ -834,51 +830,6 @@ predicate subscriptReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
834830
)
835831
}
836832

837-
predicate dictReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
838-
// see
839-
// - https://docs.python.org/3.10/library/stdtypes.html#dict.get
840-
// - https://docs.python.org/3.10/library/stdtypes.html#dict.setdefault
841-
exists(MethodCallNode call |
842-
call.calls(nodeFrom, ["get", "setdefault"]) and
843-
call.getArg(0).asExpr().(StrConst).getText() = c.(DictionaryElementContent).getKey() and
844-
nodeTo = call
845-
)
846-
}
847-
848-
/** Data flows from a sequence to a call to `pop` on the sequence. */
849-
predicate popReadStep(CfgNode nodeFrom, Content c, CfgNode nodeTo) {
850-
// set.pop or list.pop
851-
// `s.pop()`
852-
// nodeFrom is `s`, cfg node
853-
// nodeTo is `s.pop()`, cfg node
854-
// c denotes element of list or set
855-
exists(CallNode call, AttrNode a |
856-
call.getFunction() = a and
857-
a.getName() = "pop" and // Should match appropriate call since we tracked a sequence here.
858-
not exists(call.getAnArg()) and
859-
nodeFrom.getNode() = a.getObject() and
860-
nodeTo.getNode() = call and
861-
(
862-
c instanceof ListElementContent
863-
or
864-
c instanceof SetElementContent
865-
)
866-
)
867-
or
868-
// dict.pop
869-
// `d.pop("key")`
870-
// nodeFrom is `d`, cfg node
871-
// nodeTo is `d.pop("key")`, cfg node
872-
// c denotes the key `"key"`
873-
exists(CallNode call, AttrNode a |
874-
call.getFunction() = a and
875-
a.getName() = "pop" and // Should match appropriate call since we tracked a dictionary here.
876-
nodeFrom.getNode() = a.getObject() and
877-
nodeTo.getNode() = call and
878-
c.(DictionaryElementContent).getKey() = call.getArg(0).getNode().(StrConst).getS()
879-
)
880-
}
881-
882833
predicate forReadStep(CfgNode nodeFrom, Content c, Node nodeTo) {
883834
exists(ForTarget target |
884835
nodeFrom.asExpr() = target.getSource() and

python/ql/lib/semmle/python/dataflow/new/internal/TaintTrackingPrivate.qll

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -190,14 +190,9 @@ predicate containerStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
190190
call.getArg(0) = nodeFrom
191191
)
192192
or
193-
// methods
193+
// dict methods
194194
exists(DataFlow::MethodCallNode call, string methodName | call = nodeTo |
195-
methodName in [
196-
// general
197-
"copy", "pop",
198-
// dict
199-
"values", "items", "get", "popitem"
200-
] and
195+
methodName in ["values", "items"] and
201196
call.calls(nodeFrom, methodName)
202197
)
203198
or

python/ql/lib/semmle/python/frameworks/Stdlib.qll

Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3900,6 +3900,176 @@ private module StdlibPrivate {
39003900
}
39013901
}
39023902

3903+
// ---------------------------------------------------------------------------
3904+
// Flow summaries for container methods
3905+
// ---------------------------------------------------------------------------
3906+
/** A flow summary for `copy`. */
3907+
class CopySummary extends SummarizedCallable {
3908+
CopySummary() { this = "collection.copy" }
3909+
3910+
override DataFlow::CallCfgNode getACall() {
3911+
result.(DataFlow::MethodCallNode).getMethodName() = "copy"
3912+
}
3913+
3914+
override DataFlow::ArgumentNode getACallback() { none() }
3915+
3916+
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
3917+
exists(string content |
3918+
content = "ListElement"
3919+
or
3920+
content = "SetElement"
3921+
or
3922+
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
3923+
content = "TupleElement[" + i.toString() + "]"
3924+
)
3925+
or
3926+
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
3927+
content = "DictionaryElement[" + key + "]"
3928+
)
3929+
|
3930+
input = "Argument[self]." + content and
3931+
output = "ReturnValue." + content and
3932+
preservesValue = true
3933+
)
3934+
or
3935+
input = "Argument[self]" and
3936+
output = "ReturnValue" and
3937+
preservesValue = true
3938+
}
3939+
}
3940+
3941+
/**
3942+
* A flow summary for `pop` either for list or set.
3943+
* This ignores the index if given, since content is
3944+
* imprecise anyway.
3945+
*
3946+
* I also handles the default value when `pop` is called
3947+
* on a dictionary, since that also does not depend on the key.
3948+
*/
3949+
class PopSummary extends SummarizedCallable {
3950+
PopSummary() { this = "collection.pop" }
3951+
3952+
override DataFlow::CallCfgNode getACall() {
3953+
result.(DataFlow::MethodCallNode).getMethodName() = "pop"
3954+
}
3955+
3956+
override DataFlow::ArgumentNode getACallback() { none() }
3957+
3958+
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
3959+
input = "Argument[self].ListElement" and
3960+
output = "ReturnValue" and
3961+
preservesValue = true
3962+
or
3963+
input = "Argument[self].SetElement" and
3964+
output = "ReturnValue" and
3965+
preservesValue = true
3966+
or
3967+
// default value for dictionary
3968+
input = "Argument[1]" and
3969+
output = "ReturnValue" and
3970+
preservesValue = true
3971+
or
3972+
// transfer taint on self to return value
3973+
input = "Argument[self]" and
3974+
output = "ReturnValue" and
3975+
preservesValue = false
3976+
}
3977+
}
3978+
3979+
/** A flow summary for `dict.pop` */
3980+
class DictPopSummary extends SummarizedCallable {
3981+
string key;
3982+
3983+
DictPopSummary() {
3984+
this = "dict.pop(" + key + ")" and
3985+
exists(DataFlow::DictionaryElementContent dc | key = dc.getKey())
3986+
}
3987+
3988+
override DataFlow::CallCfgNode getACall() {
3989+
result.(DataFlow::MethodCallNode).getMethodName() = "pop" and
3990+
result.getArg(0).getALocalSource().asExpr().(StrConst).getText() = key
3991+
}
3992+
3993+
override DataFlow::ArgumentNode getACallback() { none() }
3994+
3995+
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
3996+
input = "Argument[self].DictionaryElement[" + key + "]" and
3997+
output = "ReturnValue" and
3998+
preservesValue = true
3999+
}
4000+
}
4001+
4002+
/** A flow summary for `dict.get` at specific content. */
4003+
class DictGetSummary extends SummarizedCallable {
4004+
string key;
4005+
4006+
DictGetSummary() {
4007+
this = "dict.get(" + key + ")" and
4008+
exists(DataFlow::DictionaryElementContent dc | key = dc.getKey())
4009+
}
4010+
4011+
override DataFlow::CallCfgNode getACall() {
4012+
result.(DataFlow::MethodCallNode).getMethodName() = "get" and
4013+
result.getArg(0).getALocalSource().asExpr().(StrConst).getText() = key
4014+
}
4015+
4016+
override DataFlow::ArgumentNode getACallback() { none() }
4017+
4018+
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
4019+
input = "Argument[self].DictionaryElement[" + key + "]" and
4020+
output = "ReturnValue" and
4021+
preservesValue = true
4022+
or
4023+
// optional default value
4024+
input = "Argument[1]" and
4025+
output = "ReturnValue" and
4026+
preservesValue = true
4027+
}
4028+
}
4029+
4030+
/** A flow summary for `dict.get` disregarding content. */
4031+
class DictGetAnySummary extends SummarizedCallable {
4032+
DictGetAnySummary() { this = "dict.get" }
4033+
4034+
override DataFlow::CallCfgNode getACall() {
4035+
result.(DataFlow::MethodCallNode).getMethodName() = "get"
4036+
}
4037+
4038+
override DataFlow::ArgumentNode getACallback() { none() }
4039+
4040+
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
4041+
// default value
4042+
input = "Argument[1]" and
4043+
output = "ReturnValue" and
4044+
preservesValue = true
4045+
or
4046+
// transfer taint from self to return value
4047+
input = "Argument[self]" and
4048+
output = "ReturnValue" and
4049+
preservesValue = false
4050+
}
4051+
}
4052+
4053+
/** A flow summary for `dict.popitem` */
4054+
class DictPopitemSummary extends SummarizedCallable {
4055+
DictPopitemSummary() { this = "dict.popitem" }
4056+
4057+
override DataFlow::CallCfgNode getACall() {
4058+
result.(DataFlow::MethodCallNode).getMethodName() = "popitem"
4059+
}
4060+
4061+
override DataFlow::ArgumentNode getACallback() { none() }
4062+
4063+
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
4064+
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
4065+
input = "Argument[self].DictionaryElement[" + key + "]" and
4066+
output = "ReturnValue.TupleElement[1]" and
4067+
preservesValue = true
4068+
// TODO: put `key` into "ReturnValue.TupleElement[0]"
4069+
)
4070+
}
4071+
}
4072+
39034073
/**
39044074
* A flow summary for `dict.setdefault`.
39054075
*
@@ -3923,6 +4093,40 @@ private module StdlibPrivate {
39234093
preservesValue = true
39244094
}
39254095
}
4096+
4097+
/**
4098+
* A flow summary for `dict.setdefault` at specific content.
4099+
* See https://docs.python.org/3.10/library/stdtypes.html#dict.setdefault
4100+
* This summary handles read and store steps. See `DictSetdefaultSummary`
4101+
* for the dataflow steps.
4102+
*/
4103+
class DictSetdefaultKeySummary extends SummarizedCallable {
4104+
string key;
4105+
4106+
DictSetdefaultKeySummary() {
4107+
this = "dict.setdefault(" + key + ")" and
4108+
exists(DataFlow::DictionaryElementContent dc | key = dc.getKey())
4109+
}
4110+
4111+
override DataFlow::CallCfgNode getACall() {
4112+
result.(DataFlow::MethodCallNode).getMethodName() = "setdefault" and
4113+
result.getArg(0).getALocalSource().asExpr().(StrConst).getText() = key
4114+
}
4115+
4116+
override DataFlow::ArgumentNode getACallback() { none() }
4117+
4118+
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
4119+
// If key is in the dictionary, return its value.
4120+
input = "Argument[self].DictionaryElement[" + key + "]" and
4121+
output = "ReturnValue" and
4122+
preservesValue = true
4123+
or
4124+
// If not, insert key with a value of default.
4125+
input = "Argument[1]" and
4126+
output = "ReturnValue.DictionaryElement[" + key + "]" and
4127+
preservesValue = true
4128+
}
4129+
}
39264130
}
39274131

39284132
// ---------------------------------------------------------------------------

python/ql/test/experimental/dataflow/coverage/test_builtins.py

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def test_list_from_set():
5757
s = {SOURCE}
5858
l = list(s)
5959
SINK(l[0]) #$ flow="SOURCE, l:-2 -> l[0]"
60-
60+
6161
@expects(2)
6262
def test_list_from_dict():
6363
d = {SOURCE: 'v', NONSOURCE: 'v2'}
@@ -154,19 +154,19 @@ def test_list_pop():
154154
def test_list_pop_index():
155155
l = [SOURCE]
156156
v = l.pop(0)
157-
SINK(v) #$ MISSING: flow="SOURCE, l:-2 -> v"
157+
SINK(v) #$ flow="SOURCE, l:-2 -> v"
158158

159159
def test_list_pop_index_imprecise():
160160
l = [SOURCE, NONSOURCE]
161161
v = l.pop(1)
162-
SINK_F(v)
162+
SINK_F(v) #$ SPURIOUS: flow="SOURCE, l:-2 -> v"
163163

164164
@expects(2)
165165
def test_list_copy():
166166
l0 = [SOURCE, NONSOURCE]
167167
l = l0.copy()
168-
SINK(l[0]) #$ MISSING: flow="SOURCE, l:-2 -> l[0]"
169-
SINK_F(l[1])
168+
SINK(l[0]) #$ flow="SOURCE, l:-2 -> l[0]"
169+
SINK_F(l[1]) #$ SPURIOUS: flow="SOURCE, l:-3 -> l[1]"
170170

171171
def test_list_append():
172172
l = [NONSOURCE]
@@ -183,7 +183,7 @@ def test_set_pop():
183183
def test_set_copy():
184184
s0 = {SOURCE}
185185
s = s0.copy()
186-
SINK(s.pop()) #$ MISSING: flow="SOURCE, l:-2 -> s.pop()"
186+
SINK(s.pop()) #$ flow="SOURCE, l:-2 -> s.pop()"
187187

188188
def test_set_add():
189189
s = set([])
@@ -222,28 +222,31 @@ def test_dict_pop():
222222
v1 = d.pop("k", NONSOURCE)
223223
SINK_F(v1) #$ SPURIOUS: flow="SOURCE, l:-4 -> v1"
224224
v2 = d.pop("non-existing", SOURCE)
225-
SINK(v2) #$ MISSING: flow="SOURCE, l:-1 -> v2"
225+
SINK(v2) #$ flow="SOURCE, l:-1 -> v2"
226226

227-
@expects(2)
227+
@expects(3)
228228
def test_dict_get():
229229
d = {'k': SOURCE}
230230
v = d.get("k")
231231
SINK(v) #$ flow="SOURCE, l:-2 -> v"
232232
v1 = d.get("non-existing", SOURCE)
233-
SINK(v1) #$ MISSING: flow="SOURCE, l:-1 -> v1"
233+
SINK(v1) #$ flow="SOURCE, l:-1 -> v1"
234+
k = "k"
235+
v2 = d.get(k)
236+
SINK(v2) #$ flow="SOURCE, l:-7 -> v2"
234237

235238
@expects(2)
236239
def test_dict_popitem():
237240
d = {'k': SOURCE}
238241
t = d.popitem() # could be any pair (before 3.7), but we only have one
239242
SINK_F(t[0])
240-
SINK(t[1]) #$ MISSING: flow="SOURCE, l:-3 -> t[1]"
243+
SINK(t[1]) #$ flow="SOURCE, l:-3 -> t[1]"
241244

242245
@expects(2)
243246
def test_dict_copy():
244247
d = {'k': SOURCE, 'k1': NONSOURCE}
245248
d1 = d.copy()
246-
SINK(d1["k"]) #$ MISSING: flow="SOURCE, l:-2 -> d[k]"
249+
SINK(d1["k"]) #$ flow="SOURCE, l:-2 -> d1['k']"
247250
SINK_F(d1["k1"])
248251

249252

@@ -354,4 +357,4 @@ def test_next_dict():
354357
d = {SOURCE: "val"}
355358
i = iter(d)
356359
n = next(i)
357-
SINK(n) #$ MISSING: flow="SOURCE, l:-3 -> n"
360+
SINK(n) #$ MISSING: flow="SOURCE, l:-3 -> n"

0 commit comments

Comments
 (0)