Skip to content

Commit f5f822c

Browse files
authored
Merge pull request github#13395 from yoff/python/container-summaries-3
2 parents 354ebc2 + 9a1e895 commit f5f822c

File tree

11 files changed

+300
-48
lines changed

11 files changed

+300
-48
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
category: minorAnalysis
3+
---
4+
* More precise modelling of several container functions (such as `sorted`, `reversed`) and methods (such as `set.add`, `list.append`).

python/ql/lib/semmle/python/dataflow/new/internal/TaintTrackingPrivate.qll

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -185,25 +185,6 @@ predicate containerStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
185185
// longer -- but there needs to be a matching read-step for the store-step, and we
186186
// don't provide that right now.
187187
DataFlowPrivate::comprehensionStoreStep(nodeFrom, _, nodeTo)
188-
or
189-
// functions operating on collections
190-
exists(DataFlow::CallCfgNode call | call = nodeTo |
191-
call = API::builtin(["sorted", "reversed", "iter", "next"]).getACall() and
192-
call.getArg(0) = nodeFrom
193-
)
194-
or
195-
// dict methods
196-
exists(DataFlow::MethodCallNode call, string methodName | call = nodeTo |
197-
methodName in ["values", "items"] and
198-
call.calls(nodeFrom, methodName)
199-
)
200-
or
201-
// list.append, set.add
202-
exists(DataFlow::MethodCallNode call, DataFlow::Node obj |
203-
call.calls(obj, ["append", "add"]) and
204-
obj = nodeTo.(DataFlow::PostUpdateNode).getPreUpdateNode() and
205-
call.getArg(0) = nodeFrom
206-
)
207188
}
208189

209190
/**

python/ql/lib/semmle/python/frameworks/Stdlib.qll

Lines changed: 246 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3883,6 +3883,9 @@ private module StdlibPrivate {
38833883
}
38843884
}
38853885

3886+
// ---------------------------------------------------------------------------
3887+
// Flow summaries for functions operating on containers
3888+
// ---------------------------------------------------------------------------
38863889
/** A flow summary for `reversed`. */
38873890
class ReversedSummary extends SummarizedCallable {
38883891
ReversedSummary() { this = "builtins.reversed" }
@@ -3894,9 +3897,114 @@ private module StdlibPrivate {
38943897
}
38953898

38963899
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
3897-
input = "Argument[0].ListElement" and
3900+
(
3901+
input = "Argument[0].ListElement"
3902+
or
3903+
input = "Argument[0].SetElement"
3904+
or
3905+
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
3906+
input = "Argument[0].TupleElement[" + i.toString() + "]"
3907+
)
3908+
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
3909+
) and
38983910
output = "ReturnValue.ListElement" and
38993911
preservesValue = true
3912+
or
3913+
input = "Argument[0]" and
3914+
output = "ReturnValue" and
3915+
preservesValue = false
3916+
}
3917+
}
3918+
3919+
/** A flow summary for `sorted`. */
3920+
class SortedSummary extends SummarizedCallable {
3921+
SortedSummary() { this = "builtins.sorted" }
3922+
3923+
override DataFlow::CallCfgNode getACall() { result = API::builtin("sorted").getACall() }
3924+
3925+
override DataFlow::ArgumentNode getACallback() {
3926+
result = API::builtin("sorted").getAValueReachableFromSource()
3927+
}
3928+
3929+
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
3930+
exists(string content |
3931+
content = "ListElement"
3932+
or
3933+
content = "SetElement"
3934+
or
3935+
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
3936+
content = "TupleElement[" + i.toString() + "]"
3937+
)
3938+
|
3939+
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
3940+
input = "Argument[0]." + content and
3941+
output = "ReturnValue.ListElement" and
3942+
preservesValue = true
3943+
)
3944+
or
3945+
input = "Argument[0]" and
3946+
output = "ReturnValue" and
3947+
preservesValue = false
3948+
}
3949+
}
3950+
3951+
/** A flow summary for `iter`. */
3952+
class IterSummary extends SummarizedCallable {
3953+
IterSummary() { this = "builtins.iter" }
3954+
3955+
override DataFlow::CallCfgNode getACall() { result = API::builtin("iter").getACall() }
3956+
3957+
override DataFlow::ArgumentNode getACallback() {
3958+
result = API::builtin("iter").getAValueReachableFromSource()
3959+
}
3960+
3961+
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
3962+
(
3963+
input = "Argument[0].ListElement"
3964+
or
3965+
input = "Argument[0].SetElement"
3966+
or
3967+
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
3968+
input = "Argument[0].TupleElement[" + i.toString() + "]"
3969+
)
3970+
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
3971+
) and
3972+
output = "ReturnValue.ListElement" and
3973+
preservesValue = true
3974+
or
3975+
input = "Argument[0]" and
3976+
output = "ReturnValue" and
3977+
preservesValue = false
3978+
}
3979+
}
3980+
3981+
/** A flow summary for `next`. */
3982+
class NextSummary extends SummarizedCallable {
3983+
NextSummary() { this = "builtins.next" }
3984+
3985+
override DataFlow::CallCfgNode getACall() { result = API::builtin("next").getACall() }
3986+
3987+
override DataFlow::ArgumentNode getACallback() {
3988+
result = API::builtin("next").getAValueReachableFromSource()
3989+
}
3990+
3991+
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
3992+
(
3993+
input = "Argument[0].ListElement"
3994+
or
3995+
input = "Argument[0].SetElement"
3996+
or
3997+
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
3998+
input = "Argument[0].TupleElement[" + i.toString() + "]"
3999+
)
4000+
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
4001+
) and
4002+
output = "ReturnValue" and
4003+
preservesValue = true
4004+
or
4005+
input = "Argument[1]" and
4006+
output = "ReturnValue" and
4007+
preservesValue = true
39004008
}
39014009
}
39024010

@@ -4127,6 +4235,143 @@ private module StdlibPrivate {
41274235
preservesValue = true
41284236
}
41294237
}
4238+
4239+
/**
4240+
* A flow summary for `dict.values`.
4241+
*
4242+
* See https://docs.python.org/3.10/library/stdtypes.html#dict.values
4243+
*/
4244+
class DictValues extends SummarizedCallable {
4245+
DictValues() { this = "dict.values" }
4246+
4247+
override DataFlow::CallCfgNode getACall() {
4248+
result.(DataFlow::MethodCallNode).calls(_, "values")
4249+
}
4250+
4251+
override DataFlow::ArgumentNode getACallback() {
4252+
result.(DataFlow::AttrRead).getAttributeName() = "values"
4253+
}
4254+
4255+
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
4256+
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
4257+
input = "Argument[self].DictionaryElement[" + key + "]" and
4258+
output = "ReturnValue.ListElement" and
4259+
preservesValue = true
4260+
)
4261+
or
4262+
input = "Argument[self]" and
4263+
output = "ReturnValue" and
4264+
preservesValue = false
4265+
}
4266+
}
4267+
4268+
/**
4269+
* A flow summary for `dict.keys`.
4270+
*
4271+
* See https://docs.python.org/3.10/library/stdtypes.html#dict.keys
4272+
*/
4273+
class DictKeys extends SummarizedCallable {
4274+
DictKeys() { this = "dict.keys" }
4275+
4276+
override DataFlow::CallCfgNode getACall() { result.(DataFlow::MethodCallNode).calls(_, "keys") }
4277+
4278+
override DataFlow::ArgumentNode getACallback() {
4279+
result.(DataFlow::AttrRead).getAttributeName() = "keys"
4280+
}
4281+
4282+
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
4283+
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
4284+
input = "Argument[self]" and
4285+
output = "ReturnValue" and
4286+
preservesValue = false
4287+
}
4288+
}
4289+
4290+
/**
4291+
* A flow summary for `dict.items`.
4292+
*
4293+
* See https://docs.python.org/3.10/library/stdtypes.html#dict.items
4294+
*/
4295+
class DictItems extends SummarizedCallable {
4296+
DictItems() { this = "dict.items" }
4297+
4298+
override DataFlow::CallCfgNode getACall() {
4299+
result.(DataFlow::MethodCallNode).calls(_, "items")
4300+
}
4301+
4302+
override DataFlow::ArgumentNode getACallback() {
4303+
result.(DataFlow::AttrRead).getAttributeName() = "items"
4304+
}
4305+
4306+
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
4307+
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
4308+
input = "Argument[self].DictionaryElement[" + key + "]" and
4309+
output = "ReturnValue.ListElement.TupleElement[1]" and
4310+
preservesValue = true
4311+
)
4312+
or
4313+
// TODO: Add the keys to output list
4314+
input = "Argument[self]" and
4315+
output = "ReturnValue" and
4316+
preservesValue = false
4317+
}
4318+
}
4319+
4320+
/**
4321+
* A flow summary for `list.append`.
4322+
*
4323+
* See https://docs.python.org/3.10/library/stdtypes.html#typesseq-mutable
4324+
*/
4325+
class ListAppend extends SummarizedCallable {
4326+
ListAppend() { this = "list.append" }
4327+
4328+
override DataFlow::CallCfgNode getACall() {
4329+
result.(DataFlow::MethodCallNode).calls(_, "append")
4330+
}
4331+
4332+
override DataFlow::ArgumentNode getACallback() {
4333+
result.(DataFlow::AttrRead).getAttributeName() = "append"
4334+
}
4335+
4336+
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
4337+
// newly added element added to this
4338+
input = "Argument[0]" and
4339+
output = "Argument[self].ListElement" and
4340+
preservesValue = true
4341+
or
4342+
// transfer taint from new element to this (TODO: remove in future when taint-handling is more in line with other languages)
4343+
input = "Argument[0]" and
4344+
output = "Argument[self]" and
4345+
preservesValue = false
4346+
}
4347+
}
4348+
4349+
/**
4350+
* A flow summary for `set.add`.
4351+
*
4352+
* See https://docs.python.org/3.10/library/stdtypes.html#frozenset.add
4353+
*/
4354+
class SetAdd extends SummarizedCallable {
4355+
SetAdd() { this = "set.add" }
4356+
4357+
override DataFlow::CallCfgNode getACall() { result.(DataFlow::MethodCallNode).calls(_, "add") }
4358+
4359+
override DataFlow::ArgumentNode getACallback() {
4360+
result.(DataFlow::AttrRead).getAttributeName() = "add"
4361+
}
4362+
4363+
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
4364+
// newly added element added to this
4365+
input = "Argument[0]" and
4366+
output = "Argument[self].SetElement" and
4367+
preservesValue = true
4368+
or
4369+
// transfer taint from new element to this (TODO: remove in future when taint-handling is more in line with other languages)
4370+
input = "Argument[0]" and
4371+
output = "Argument[self]" and
4372+
preservesValue = false
4373+
}
4374+
}
41304375
}
41314376

41324377
// ---------------------------------------------------------------------------

python/ql/test/experimental/dataflow/coverage/test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ def test_nested_comprehension_deep_with_local_flow():
192192
def test_nested_comprehension_dict():
193193
d = {"s": [SOURCE]}
194194
x = [y for k, v in d.items() for y in v]
195-
SINK(x[0]) #$ MISSING:flow="SOURCE, l:-2 -> x[0]"
195+
SINK(x[0]) #$ flow="SOURCE, l:-2 -> x[0]"
196196

197197

198198
def test_nested_comprehension_paren():

0 commit comments

Comments
 (0)