Skip to content

Commit 4b4b9bf

Browse files
committed
python: add missing summaries
For append/add: The new results in the experimental tar slip query show that we do not recognize the sanitisers.
1 parent b72c93f commit 4b4b9bf

File tree

11 files changed

+388
-47
lines changed

11 files changed

+388
-47
lines changed

python/ql/lib/semmle/python/frameworks/Stdlib.qll

Lines changed: 276 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3883,6 +3883,9 @@ private module StdlibPrivate {
38833883
}
38843884
}
38853885

3886+
// ---------------------------------------------------------------------------
3887+
// Flow summaries for functions operating on containers
3888+
// ---------------------------------------------------------------------------
38863889
/** A flow summary for `reversed`. */
38873890
class ReversedSummary extends SummarizedCallable {
38883891
ReversedSummary() { this = "builtins.reversed" }
@@ -3894,9 +3897,114 @@ private module StdlibPrivate {
38943897
}
38953898

38963899
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
3897-
input = "Argument[0].ListElement" and
3900+
(
3901+
input = "Argument[0].ListElement"
3902+
or
3903+
input = "Argument[0].SetElement"
3904+
or
3905+
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
3906+
input = "Argument[0].TupleElement[" + i.toString() + "]"
3907+
)
3908+
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
3909+
) and
38983910
output = "ReturnValue.ListElement" and
38993911
preservesValue = true
3912+
or
3913+
input = "Argument[0]" and
3914+
output = "ReturnValue" and
3915+
preservesValue = false
3916+
}
3917+
}
3918+
3919+
/** A flow summary for `sorted`. */
3920+
class SortedSummary extends SummarizedCallable {
3921+
SortedSummary() { this = "builtins.sorted" }
3922+
3923+
override DataFlow::CallCfgNode getACall() { result = API::builtin("sorted").getACall() }
3924+
3925+
override DataFlow::ArgumentNode getACallback() {
3926+
result = API::builtin("sorted").getAValueReachableFromSource()
3927+
}
3928+
3929+
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
3930+
exists(string content |
3931+
content = "ListElement"
3932+
or
3933+
content = "SetElement"
3934+
or
3935+
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
3936+
content = "TupleElement[" + i.toString() + "]"
3937+
)
3938+
|
3939+
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
3940+
input = "Argument[0]." + content and
3941+
output = "ReturnValue.ListElement" and
3942+
preservesValue = true
3943+
)
3944+
or
3945+
input = "Argument[0]" and
3946+
output = "ReturnValue" and
3947+
preservesValue = false
3948+
}
3949+
}
3950+
3951+
/** A flow summary for `iter`. */
3952+
class IterSummary extends SummarizedCallable {
3953+
IterSummary() { this = "builtins.iter" }
3954+
3955+
override DataFlow::CallCfgNode getACall() { result = API::builtin("iter").getACall() }
3956+
3957+
override DataFlow::ArgumentNode getACallback() {
3958+
result = API::builtin("iter").getAValueReachableFromSource()
3959+
}
3960+
3961+
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
3962+
(
3963+
input = "Argument[0].ListElement"
3964+
or
3965+
input = "Argument[0].SetElement"
3966+
or
3967+
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
3968+
input = "Argument[0].TupleElement[" + i.toString() + "]"
3969+
)
3970+
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
3971+
) and
3972+
output = "ReturnValue.ListElement" and
3973+
preservesValue = true
3974+
or
3975+
input = "Argument[0]" and
3976+
output = "ReturnValue" and
3977+
preservesValue = false
3978+
}
3979+
}
3980+
3981+
/** A flow summary for `next`. */
3982+
class NextSummary extends SummarizedCallable {
3983+
NextSummary() { this = "builtins.next" }
3984+
3985+
override DataFlow::CallCfgNode getACall() { result = API::builtin("next").getACall() }
3986+
3987+
override DataFlow::ArgumentNode getACallback() {
3988+
result = API::builtin("next").getAValueReachableFromSource()
3989+
}
3990+
3991+
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
3992+
(
3993+
input = "Argument[0].ListElement"
3994+
or
3995+
input = "Argument[0].SetElement"
3996+
or
3997+
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
3998+
input = "Argument[0].TupleElement[" + i.toString() + "]"
3999+
)
4000+
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
4001+
) and
4002+
output = "ReturnValue" and
4003+
preservesValue = true
4004+
or
4005+
input = "Argument[1]" and
4006+
output = "ReturnValue" and
4007+
preservesValue = true
39004008
}
39014009
}
39024010

@@ -4127,6 +4235,173 @@ private module StdlibPrivate {
41274235
preservesValue = true
41284236
}
41294237
}
4238+
4239+
/**
4240+
* A flow summary for `dict.values`.
4241+
*
4242+
* See https://docs.python.org/3.10/library/stdtypes.html#dict.values
4243+
*/
4244+
class DictValues extends SummarizedCallable {
4245+
DictValues() { this = "dict.values" }
4246+
4247+
override DataFlow::CallCfgNode getACall() {
4248+
result.(DataFlow::MethodCallNode).calls(_, "values")
4249+
}
4250+
4251+
override DataFlow::ArgumentNode getACallback() {
4252+
result.(DataFlow::AttrRead).getAttributeName() = "values"
4253+
}
4254+
4255+
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
4256+
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
4257+
input = "Argument[self].DictionaryElement[" + key + "]" and
4258+
output = "ReturnValue.ListElement" and
4259+
preservesValue = true
4260+
)
4261+
or
4262+
input = "Argument[self]" and
4263+
output = "ReturnValue" and
4264+
preservesValue = false
4265+
}
4266+
}
4267+
4268+
/**
4269+
* A flow summary for `dict.keys`.
4270+
*
4271+
* See https://docs.python.org/3.10/library/stdtypes.html#dict.keys
4272+
*/
4273+
class DictKeys extends SummarizedCallable {
4274+
DictKeys() { this = "dict.keys" }
4275+
4276+
override DataFlow::CallCfgNode getACall() { result.(DataFlow::MethodCallNode).calls(_, "keys") }
4277+
4278+
override DataFlow::ArgumentNode getACallback() {
4279+
result.(DataFlow::AttrRead).getAttributeName() = "keys"
4280+
}
4281+
4282+
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
4283+
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
4284+
input = "Argument[self]" and
4285+
output = "ReturnValue" and
4286+
preservesValue = false
4287+
}
4288+
}
4289+
4290+
/**
4291+
* A flow summary for `dict.items`.
4292+
*
4293+
* See https://docs.python.org/3.10/library/stdtypes.html#dict.items
4294+
*/
4295+
class DictItems extends SummarizedCallable {
4296+
DictItems() { this = "dict.items" }
4297+
4298+
override DataFlow::CallCfgNode getACall() {
4299+
result.(DataFlow::MethodCallNode).calls(_, "items")
4300+
}
4301+
4302+
override DataFlow::ArgumentNode getACallback() {
4303+
result.(DataFlow::AttrRead).getAttributeName() = "items"
4304+
}
4305+
4306+
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
4307+
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
4308+
input = "Argument[self].DictionaryElement[" + key + "]" and
4309+
output = "ReturnValue.ListElement.TupleElement[1]" and
4310+
preservesValue = true
4311+
)
4312+
or
4313+
// TODO: Add the keys to output list
4314+
input = "Argument[self]" and
4315+
output = "ReturnValue" and
4316+
preservesValue = false
4317+
}
4318+
}
4319+
4320+
/**
4321+
* A flow summary for `list.append`.
4322+
*
4323+
* See https://docs.python.org/3.10/library/stdtypes.html#typesseq-mutable
4324+
*/
4325+
class ListAppend extends SummarizedCallable {
4326+
ListAppend() { this = "list.append" }
4327+
4328+
override DataFlow::CallCfgNode getACall() {
4329+
result.(DataFlow::MethodCallNode).calls(_, "append")
4330+
}
4331+
4332+
override DataFlow::ArgumentNode getACallback() {
4333+
result.(DataFlow::AttrRead).getAttributeName() = "append"
4334+
}
4335+
4336+
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
4337+
// existing elements
4338+
input = "Argument[self].ListElement" and
4339+
output = "ReturnValue.ListElement" and
4340+
preservesValue = true
4341+
or
4342+
// newly added element returned
4343+
input = "Argument[0]" and
4344+
output = "ReturnValue.ListElement" and
4345+
preservesValue = true
4346+
or
4347+
// newly added element added to this
4348+
input = "Argument[0]" and
4349+
output = "Argument[self].ListElement" and
4350+
preservesValue = true
4351+
or
4352+
// transfer taint from new element to this
4353+
input = "Argument[0]" and
4354+
output = "Argument[self]" and
4355+
preservesValue = false
4356+
or
4357+
// transfer taint from new element to return value
4358+
input = "Argument[0]" and
4359+
output = "ReturnValue" and
4360+
preservesValue = false
4361+
}
4362+
}
4363+
4364+
/**
4365+
* A flow summary for `set.add`.
4366+
*
4367+
* See https://docs.python.org/3.10/library/stdtypes.html#frozenset.add
4368+
*/
4369+
class SetAdd extends SummarizedCallable {
4370+
SetAdd() { this = "set.add" }
4371+
4372+
override DataFlow::CallCfgNode getACall() { result.(DataFlow::MethodCallNode).calls(_, "add") }
4373+
4374+
override DataFlow::ArgumentNode getACallback() {
4375+
result.(DataFlow::AttrRead).getAttributeName() = "add"
4376+
}
4377+
4378+
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
4379+
// existing elements
4380+
input = "Argument[self].SetElement" and
4381+
output = "ReturnValue.SetElement" and
4382+
preservesValue = true
4383+
or
4384+
// newly added element returned
4385+
input = "Argument[0]" and
4386+
output = "ReturnValue.SetElement" and
4387+
preservesValue = true
4388+
or
4389+
// newly added element added to this
4390+
input = "Argument[0]" and
4391+
output = "Argument[self].SetElement" and
4392+
preservesValue = true
4393+
or
4394+
// transfer taint from new element to this
4395+
input = "Argument[0]" and
4396+
output = "Argument[self]" and
4397+
preservesValue = false
4398+
or
4399+
// transfer taint from new element to return value
4400+
input = "Argument[0]" and
4401+
output = "ReturnValue" and
4402+
preservesValue = false
4403+
}
4404+
}
41304405
}
41314406

41324407
// ---------------------------------------------------------------------------

python/ql/test/experimental/dataflow/coverage/test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ def test_nested_comprehension_deep_with_local_flow():
192192
def test_nested_comprehension_dict():
193193
d = {"s": [SOURCE]}
194194
x = [y for k, v in d.items() for y in v]
195-
SINK(x[0]) #$ MISSING:flow="SOURCE, l:-2 -> x[0]"
195+
SINK(x[0]) #$ flow="SOURCE, l:-2 -> x[0]"
196196

197197

198198
def test_nested_comprehension_paren():

0 commit comments

Comments
 (0)