Skip to content

Commit 9cb83fc

Browse files
committed
python: add summaries for
copy, pop, get, getitem, setdefault Also add read steps to taint tracking. Reading from a tainted collection can be done in two situations: 1. There is an acces path In this case a read step (possibly from a flow summary) gives rise to a taint step. 2. There is no access path In this case an explicit taint step (possibly via a flow summary) should exist.
1 parent 144df9a commit 9cb83fc

File tree

36 files changed

+963
-99
lines changed

36 files changed

+963
-99
lines changed

python/ql/lib/semmle/python/dataflow/new/internal/TaintTrackingPrivate.qll

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,19 @@ predicate containerStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
202202
obj = nodeTo.(DataFlow::PostUpdateNode).getPreUpdateNode() and
203203
call.getArg(0) = nodeFrom
204204
)
205+
or
206+
// Although flow through collections is modeled precisely using stores/reads, we still
207+
// allow flow out of a _tainted_ collection. This is needed in order to support taint-
208+
// tracking configurations where the source is a collection.
209+
exists(DataFlow::Content c | DataFlowPrivate::readStep(nodeFrom, c, nodeTo) |
210+
// c instanceof DataFlow::ListElementContent
211+
// or
212+
// c instanceof DataFlow::SetElementContent
213+
// or
214+
c instanceof DataFlow::DictionaryElementContent
215+
// or
216+
// c instanceof DataFlow::DictionaryElementAnyContent
217+
)
205218
}
206219

207220
/**

python/ql/lib/semmle/python/frameworks/Stdlib.qll

Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3939,6 +3939,176 @@ private module StdlibPrivate {
39393939
}
39403940
}
39413941

3942+
// ---------------------------------------------------------------------------
3943+
// Flow summaries for container methods
3944+
// ---------------------------------------------------------------------------
3945+
/** A flow summary for `copy`. */
3946+
class CopySummary extends SummarizedCallable {
3947+
CopySummary() { this = "collection.copy" }
3948+
3949+
override DataFlow::CallCfgNode getACall() {
3950+
result.(DataFlow::MethodCallNode).getMethodName() = "copy"
3951+
}
3952+
3953+
override DataFlow::ArgumentNode getACallback() { none() }
3954+
3955+
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
3956+
exists(string content |
3957+
content = "ListElement"
3958+
or
3959+
content = "SetElement"
3960+
or
3961+
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
3962+
content = "TupleElement[" + i.toString() + "]"
3963+
)
3964+
or
3965+
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
3966+
content = "DictionaryElement[" + key + "]"
3967+
)
3968+
|
3969+
input = "Argument[self]." + content and
3970+
output = "ReturnValue." + content and
3971+
preservesValue = true
3972+
)
3973+
or
3974+
input = "Argument[self]" and
3975+
output = "ReturnValue" and
3976+
preservesValue = false
3977+
}
3978+
}
3979+
3980+
/**
3981+
* A flow summary for `pop` either for list or set.
3982+
* This ignores the index if given, since content is
3983+
* imprecise anyway.
3984+
*
3985+
* I also handles the default value when `pop` is called
3986+
* on a dictionary, since that also does not depend on the key.
3987+
*/
3988+
class PopSummary extends SummarizedCallable {
3989+
PopSummary() { this = "collection.pop" }
3990+
3991+
override DataFlow::CallCfgNode getACall() {
3992+
result.(DataFlow::MethodCallNode).getMethodName() = "pop"
3993+
}
3994+
3995+
override DataFlow::ArgumentNode getACallback() { none() }
3996+
3997+
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
3998+
input = "Argument[self].ListElement" and
3999+
output = "ReturnValue" and
4000+
preservesValue = true
4001+
or
4002+
input = "Argument[self].SetElement" and
4003+
output = "ReturnValue" and
4004+
preservesValue = true
4005+
or
4006+
// default value for dictionary
4007+
input = "Argument[1]" and
4008+
output = "ReturnValue" and
4009+
preservesValue = true
4010+
or
4011+
// transfer taint on self to return value
4012+
input = "Argument[self]" and
4013+
output = "ReturnValue" and
4014+
preservesValue = false
4015+
}
4016+
}
4017+
4018+
/** A flow summary for `dict.pop` */
4019+
class DictPopSummary extends SummarizedCallable {
4020+
string key;
4021+
4022+
DictPopSummary() {
4023+
this = "dict.pop(" + key + ")" and
4024+
exists(DataFlow::DictionaryElementContent dc | key = dc.getKey())
4025+
}
4026+
4027+
override DataFlow::CallCfgNode getACall() {
4028+
result.(DataFlow::MethodCallNode).getMethodName() = "pop" and
4029+
result.getArg(0).getALocalSource().asExpr().(StrConst).getText() = key
4030+
}
4031+
4032+
override DataFlow::ArgumentNode getACallback() { none() }
4033+
4034+
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
4035+
input = "Argument[self].DictionaryElement[" + key + "]" and
4036+
output = "ReturnValue" and
4037+
preservesValue = true
4038+
}
4039+
}
4040+
4041+
/** A flow summary for `dict.get` at specific content. */
4042+
class DictGetSummary extends SummarizedCallable {
4043+
string key;
4044+
4045+
DictGetSummary() {
4046+
this = "dict.get(" + key + ")" and
4047+
exists(DataFlow::DictionaryElementContent dc | key = dc.getKey())
4048+
}
4049+
4050+
override DataFlow::CallCfgNode getACall() {
4051+
result.(DataFlow::MethodCallNode).getMethodName() = "get" and
4052+
result.getArg(0).getALocalSource().asExpr().(StrConst).getText() = key
4053+
}
4054+
4055+
override DataFlow::ArgumentNode getACallback() { none() }
4056+
4057+
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
4058+
input = "Argument[self].DictionaryElement[" + key + "]" and
4059+
output = "ReturnValue" and
4060+
preservesValue = true
4061+
or
4062+
// optional default value
4063+
input = "Argument[1]" and
4064+
output = "ReturnValue" and
4065+
preservesValue = true
4066+
}
4067+
}
4068+
4069+
/** A flow summary for `dict.get` disregarding content. */
4070+
class DictGetAnySummary extends SummarizedCallable {
4071+
DictGetAnySummary() { this = "dict.get" }
4072+
4073+
override DataFlow::CallCfgNode getACall() {
4074+
result.(DataFlow::MethodCallNode).getMethodName() = "get"
4075+
}
4076+
4077+
override DataFlow::ArgumentNode getACallback() { none() }
4078+
4079+
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
4080+
// default value
4081+
input = "Argument[1]" and
4082+
output = "ReturnValue" and
4083+
preservesValue = true
4084+
or
4085+
// transfer taint from self to return value
4086+
input = "Argument[self]" and
4087+
output = "ReturnValue" and
4088+
preservesValue = false
4089+
}
4090+
}
4091+
4092+
/** A flow summary for `dict.popitem` */
4093+
class DictPopitemSummary extends SummarizedCallable {
4094+
DictPopitemSummary() { this = "dict.popitem" }
4095+
4096+
override DataFlow::CallCfgNode getACall() {
4097+
result.(DataFlow::MethodCallNode).getMethodName() = "popitem"
4098+
}
4099+
4100+
override DataFlow::ArgumentNode getACallback() { none() }
4101+
4102+
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
4103+
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
4104+
input = "Argument[self].DictionaryElement[" + key + "]" and
4105+
output = "ReturnValue.TupleElement[1]" and
4106+
preservesValue = true
4107+
// TODO: put `key` into "ReturnValue.TupleElement[0]"
4108+
)
4109+
}
4110+
}
4111+
39424112
/**
39434113
* A flow summary for `dict.setdefault`.
39444114
*
@@ -3962,6 +4132,40 @@ private module StdlibPrivate {
39624132
preservesValue = true
39634133
}
39644134
}
4135+
4136+
/**
4137+
* A flow summary for `dict.setdefault` at specifi key.
4138+
* See https://docs.python.org/3.10/library/stdtypes.html#dict.setdefault
4139+
* This summary handles read and store steps. See `DictSetdefaultSummary`
4140+
* for the dataflow steps.
4141+
*/
4142+
class DictSetdefaultKeySummary extends SummarizedCallable {
4143+
string key;
4144+
4145+
DictSetdefaultKeySummary() {
4146+
this = "dict.setdefault(" + key + ")" and
4147+
exists(DataFlow::DictionaryElementContent dc | key = dc.getKey())
4148+
}
4149+
4150+
override DataFlow::CallCfgNode getACall() {
4151+
result.(DataFlow::MethodCallNode).getMethodName() = "setdefault" and
4152+
result.getArg(0).getALocalSource().asExpr().(StrConst).getText() = key
4153+
}
4154+
4155+
override DataFlow::ArgumentNode getACallback() { none() }
4156+
4157+
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
4158+
// If key is in the dictionary, return its value.
4159+
input = "Argument[self].DictionaryElement[" + key + "]" and
4160+
output = "ReturnValue" and
4161+
preservesValue = true
4162+
or
4163+
// If not, insert key with a value of default.
4164+
input = "Argument[1]" and
4165+
output = "ReturnValue.DictionaryElement[" + key + "]" and
4166+
preservesValue = true
4167+
}
4168+
}
39654169
}
39664170

39674171
// ---------------------------------------------------------------------------

python/ql/test/experimental/dataflow/coverage/test.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -123,23 +123,23 @@ def test_nested_list_display():
123123
# 6.2.6. Set displays
124124
def test_set_display():
125125
x = {SOURCE}
126-
SINK(x.pop()) #$ MISSING:flow="SOURCE, l:-1 -> x.pop()"
126+
SINK(x.pop()) #$ flow="SOURCE, l:-1 -> x.pop()"
127127

128128

129129
def test_set_comprehension():
130130
x = {SOURCE for y in [NONSOURCE]}
131-
SINK(x.pop()) #$ MISSING:flow="SOURCE, l:-1 -> x.pop()"
131+
SINK(x.pop()) #$ flow="SOURCE, l:-1 -> x.pop()"
132132

133133

134134
def test_set_comprehension_flow():
135135
x = {y for y in [SOURCE]}
136-
SINK(x.pop()) #$ MISSING:flow="SOURCE, l:-1 -> x.pop()"
136+
SINK(x.pop()) #$ flow="SOURCE, l:-1 -> x.pop()"
137137

138138

139139
def test_set_comprehension_inflow():
140140
l = {SOURCE}
141141
x = {y for y in l}
142-
SINK(x.pop()) #$ MISSING:flow="SOURCE, l:-2 -> x.pop()"
142+
SINK(x.pop()) #$ flow="SOURCE, l:-2 -> x.pop()"
143143

144144

145145
def test_nested_set_display():
@@ -155,7 +155,7 @@ def test_dict_display():
155155

156156
def test_dict_display_pop():
157157
x = {"s": SOURCE}
158-
SINK(x.pop("s")) #$ MISSING:flow="SOURCE, l:-1 -> x.pop(..)"
158+
SINK(x.pop("s")) #$ flow="SOURCE, l:-1 -> x.pop(..)"
159159

160160

161161
def test_dict_comprehension():

python/ql/test/experimental/dataflow/coverage/test_builtins.py

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -100,19 +100,19 @@ def test_set_from_list():
100100
l = [SOURCE]
101101
s = set(l)
102102
v = s.pop()
103-
SINK(v) #$ MISSING:flow="SOURCE, l:-3 -> v"
103+
SINK(v) #$ flow="SOURCE, l:-3 -> v"
104104

105105
def test_set_from_tuple():
106106
t = (SOURCE,)
107107
s = set(t)
108108
v = s.pop()
109-
SINK(v) #$ MISSING:flow="SOURCE, l:-3 -> v"
109+
SINK(v) #$ flow="SOURCE, l:-3 -> v"
110110

111111
def test_set_from_set():
112112
s0 = {SOURCE}
113113
s = set(s0)
114114
v = s.pop()
115-
SINK(v) #$ MISSING:flow="SOURCE, l:-3 -> v"
115+
SINK(v) #$ flow="SOURCE, l:-3 -> v"
116116

117117
def test_set_from_dict():
118118
d = {SOURCE: "val"}
@@ -149,24 +149,24 @@ def test_dict_from_dict():
149149
def test_list_pop():
150150
l = [SOURCE]
151151
v = l.pop()
152-
SINK(v) #$ MISSING:flow="SOURCE, l:-2 -> v"
152+
SINK(v) #$ flow="SOURCE, l:-2 -> v"
153153

154154
def test_list_pop_index():
155155
l = [SOURCE]
156156
v = l.pop(0)
157-
SINK(v) #$ MISSING: flow="SOURCE, l:-2 -> v"
157+
SINK(v) #$ flow="SOURCE, l:-2 -> v"
158158

159159
def test_list_pop_index_imprecise():
160160
l = [SOURCE, NONSOURCE]
161161
v = l.pop(1)
162-
SINK_F(v)
162+
SINK_F(v) #$ SPURIOUS: flow="SOURCE, l:-2 -> v"
163163

164164
@expects(2)
165165
def test_list_copy():
166166
l0 = [SOURCE, NONSOURCE]
167167
l = l0.copy()
168-
SINK(l[0]) #$ MISSING: flow="SOURCE, l:-2 -> l[0]"
169-
SINK_F(l[1])
168+
SINK(l[0]) #$ flow="SOURCE, l:-2 -> l[0]"
169+
SINK_F(l[1]) #$ SPURIOUS: flow="SOURCE, l:-3 -> l[1]"
170170

171171
def test_list_append():
172172
l = [NONSOURCE]
@@ -178,12 +178,12 @@ def test_list_append():
178178
def test_set_pop():
179179
s = {SOURCE}
180180
v = s.pop()
181-
SINK(v) #$ MISSING:flow="SOURCE, l:-2 -> v"
181+
SINK(v) #$ flow="SOURCE, l:-2 -> v"
182182

183183
def test_set_copy():
184184
s0 = {SOURCE}
185185
s = s0.copy()
186-
SINK(s.pop()) #$ MISSING: flow="SOURCE, l:-2 -> s.pop()"
186+
SINK(s.pop()) #$ flow="SOURCE, l:-2 -> s.pop()"
187187

188188
def test_set_add():
189189
s = set([])
@@ -218,32 +218,32 @@ def test_dict_items():
218218
def test_dict_pop():
219219
d = {'k': SOURCE}
220220
v = d.pop("k")
221-
SINK(v) #$ MISSING:flow="SOURCE, l:-2 -> v"
221+
SINK(v) #$ flow="SOURCE, l:-2 -> v"
222222
v1 = d.pop("k", NONSOURCE)
223-
SINK_F(v1)
223+
SINK_F(v1) #$ SPURIOUS: flow="SOURCE, l:-4 -> v1"
224224
v2 = d.pop("non-existing", SOURCE)
225-
SINK(v2) #$ MISSING: flow="SOURCE, l:-1 -> v2"
225+
SINK(v2) #$ flow="SOURCE, l:-1 -> v2"
226226

227227
@expects(2)
228228
def test_dict_get():
229229
d = {'k': SOURCE}
230230
v = d.get("k")
231-
SINK(v) #$ MISSING:flow="SOURCE, l:-2 -> v"
231+
SINK(v) #$ flow="SOURCE, l:-2 -> v"
232232
v1 = d.get("non-existing", SOURCE)
233-
SINK(v1) #$ MISSING: flow="SOURCE, l:-1 -> v1"
233+
SINK(v1) #$ flow="SOURCE, l:-1 -> v1"
234234

235235
@expects(2)
236236
def test_dict_popitem():
237237
d = {'k': SOURCE}
238238
t = d.popitem() # could be any pair (before 3.7), but we only have one
239239
SINK_F(t[0])
240-
SINK(t[1]) #$ MISSING: flow="SOURCE, l:-3 -> t[1]"
240+
SINK(t[1]) #$ flow="SOURCE, l:-3 -> t[1]"
241241

242242
@expects(2)
243243
def test_dict_copy():
244244
d = {'k': SOURCE, 'k1': NONSOURCE}
245245
d1 = d.copy()
246-
SINK(d1["k"]) #$ MISSING: flow="SOURCE, l:-2 -> d[k]"
246+
SINK(d1["k"]) #$ flow="SOURCE, l:-2 -> d1['k']"
247247
SINK_F(d1["k1"])
248248

249249

0 commit comments

Comments
 (0)