Skip to content

Commit 1c6d643

Browse files
authored
Merge pull request github#13146 from yoff/python/container-summaries-1
Python: Container summaries, part 1
2 parents 06d5a7f + 5d68473 commit 1c6d643

25 files changed

+189
-67
lines changed

python/ql/lib/semmle/python/dataflow/new/internal/TaintTrackingPrivate.qll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
private import python
22
private import semmle.python.dataflow.new.DataFlow
33
private import semmle.python.dataflow.new.internal.DataFlowPrivate as DataFlowPrivate
4+
private import FlowSummaryImpl as FlowSummaryImpl
45
private import semmle.python.dataflow.new.internal.TaintTrackingPublic
56
private import semmle.python.ApiGraphs
67

@@ -55,6 +56,8 @@ private module Cached {
5556
awaitStep(nodeFrom, nodeTo)
5657
or
5758
asyncWithStep(nodeFrom, nodeTo)
59+
or
60+
FlowSummaryImpl::Private::Steps::summaryLocalStep(nodeFrom, nodeTo, false)
5861
}
5962
}
6063

@@ -159,7 +162,7 @@ predicate stringManipulation(DataFlow::CfgNode nodeFrom, DataFlow::CfgNode nodeT
159162
* is currently very imprecise, as an example, since we model `dict.get`, we treat any
160163
* `<tainted object>.get(<arg>)` will be tainted, whether it's true or not.
161164
*/
162-
predicate containerStep(DataFlow::CfgNode nodeFrom, DataFlow::Node nodeTo) {
165+
predicate containerStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
163166
// construction by literal
164167
//
165168
// TODO: once we have proper flow-summary modeling, we might not need this step any
@@ -181,13 +184,6 @@ predicate containerStep(DataFlow::CfgNode nodeFrom, DataFlow::Node nodeTo) {
181184
// don't provide that right now.
182185
DataFlowPrivate::comprehensionStoreStep(nodeFrom, _, nodeTo)
183186
or
184-
// constructor call
185-
exists(DataFlow::CallCfgNode call | call = nodeTo |
186-
call = API::builtin(["list", "set", "frozenset", "dict", "tuple"]).getACall() and
187-
call.getArg(0) = nodeFrom
188-
// TODO: Properly handle defaultdict/namedtuple
189-
)
190-
or
191187
// functions operating on collections
192188
exists(DataFlow::CallCfgNode call | call = nodeTo |
193189
call = API::builtin(["sorted", "reversed", "iter", "next"]).getACall() and

python/ql/lib/semmle/python/frameworks/Stdlib.qll

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3790,6 +3790,138 @@ private module StdlibPrivate {
37903790
override DataFlow::Node getAPathArgument() { result = this.getAnInput() }
37913791
}
37923792

3793+
// ---------------------------------------------------------------------------
3794+
// Flow summaries for functions contructing containers
3795+
// ---------------------------------------------------------------------------
3796+
/** A flow summary for `dict`. */
3797+
class DictSummary extends SummarizedCallable {
3798+
DictSummary() { this = "builtins.dict" }
3799+
3800+
override DataFlow::CallCfgNode getACall() { result = API::builtin("dict").getACall() }
3801+
3802+
override DataFlow::ArgumentNode getACallback() {
3803+
result = API::builtin("dict").getAValueReachableFromSource()
3804+
}
3805+
3806+
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
3807+
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
3808+
input = "Argument[0].DictionaryElement[" + key + "]" and
3809+
output = "ReturnValue.DictionaryElement[" + key + "]" and
3810+
preservesValue = true
3811+
)
3812+
or
3813+
exists(DataFlow::DictionaryElementContent dc, string key | key = dc.getKey() |
3814+
input = "Argument[" + key + ":]" and
3815+
output = "ReturnValue.DictionaryElement[" + key + "]" and
3816+
preservesValue = true
3817+
)
3818+
or
3819+
input = "Argument[0]" and
3820+
output = "ReturnValue" and
3821+
preservesValue = false
3822+
}
3823+
}
3824+
3825+
/** A flow summary for `list`. */
3826+
class ListSummary extends SummarizedCallable {
3827+
ListSummary() { this = "builtins.list" }
3828+
3829+
override DataFlow::CallCfgNode getACall() { result = API::builtin("list").getACall() }
3830+
3831+
override DataFlow::ArgumentNode getACallback() {
3832+
result = API::builtin("list").getAValueReachableFromSource()
3833+
}
3834+
3835+
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
3836+
(
3837+
input = "Argument[0].ListElement"
3838+
or
3839+
input = "Argument[0].SetElement"
3840+
or
3841+
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
3842+
input = "Argument[0].TupleElement[" + i.toString() + "]"
3843+
)
3844+
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
3845+
) and
3846+
output = "ReturnValue.ListElement" and
3847+
preservesValue = true
3848+
or
3849+
input = "Argument[0]" and
3850+
output = "ReturnValue" and
3851+
preservesValue = false
3852+
}
3853+
}
3854+
3855+
/** A flow summary for tuple */
3856+
class TupleSummary extends SummarizedCallable {
3857+
TupleSummary() { this = "builtins.tuple" }
3858+
3859+
override DataFlow::CallCfgNode getACall() { result = API::builtin("tuple").getACall() }
3860+
3861+
override DataFlow::ArgumentNode getACallback() {
3862+
result = API::builtin("tuple").getAValueReachableFromSource()
3863+
}
3864+
3865+
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
3866+
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
3867+
input = "Argument[0].TupleElement[" + i.toString() + "]" and
3868+
output = "ReturnValue.TupleElement[" + i.toString() + "]" and
3869+
preservesValue = true
3870+
)
3871+
or
3872+
// TODO: We need to also translate iterable content such as list element
3873+
// but we currently lack TupleElementAny
3874+
input = "Argument[0]" and
3875+
output = "ReturnValue" and
3876+
preservesValue = false
3877+
}
3878+
}
3879+
3880+
/** A flow summary for set */
3881+
class SetSummary extends SummarizedCallable {
3882+
SetSummary() { this = "builtins.set" }
3883+
3884+
override DataFlow::CallCfgNode getACall() { result = API::builtin("set").getACall() }
3885+
3886+
override DataFlow::ArgumentNode getACallback() {
3887+
result = API::builtin("set").getAValueReachableFromSource()
3888+
}
3889+
3890+
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
3891+
(
3892+
input = "Argument[0].ListElement"
3893+
or
3894+
input = "Argument[0].SetElement"
3895+
or
3896+
exists(DataFlow::TupleElementContent tc, int i | i = tc.getIndex() |
3897+
input = "Argument[0].TupleElement[" + i.toString() + "]"
3898+
)
3899+
// TODO: Once we have DictKeyContent, we need to transform that into ListElementContent
3900+
) and
3901+
output = "ReturnValue.SetElement" and
3902+
preservesValue = true
3903+
or
3904+
input = "Argument[0]" and
3905+
output = "ReturnValue" and
3906+
preservesValue = false
3907+
}
3908+
}
3909+
3910+
/** A flow summary for frozenset */
3911+
class FrozensetSummary extends SummarizedCallable {
3912+
FrozensetSummary() { this = "builtins.frozenset" }
3913+
3914+
override DataFlow::CallCfgNode getACall() { result = API::builtin("frozenset").getACall() }
3915+
3916+
override DataFlow::ArgumentNode getACallback() {
3917+
result = API::builtin("frozenset").getAValueReachableFromSource()
3918+
}
3919+
3920+
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
3921+
any(SetSummary s).propagatesFlowExt(input, output, preservesValue)
3922+
}
3923+
}
3924+
37933925
/** A flow summary for `reversed`. */
37943926
class ReversedSummary extends SummarizedCallable {
37953927
ReversedSummary() { this = "builtins.reversed" }
Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
import experimental.dataflow.callGraphConfig
22

33
from DataFlow::Node source, DataFlow::Node sink
4-
where exists(CallGraphConfig cfg | cfg.hasFlow(source, sink))
4+
where
5+
exists(CallGraphConfig cfg | cfg.hasFlow(source, sink)) and
6+
exists(source.getLocation().getFile().getRelativePath()) and
7+
exists(sink.getLocation().getFile().getRelativePath())
58
select source, sink
Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
| file://:0:0:0:0 | parameter position 0 of builtins.reversed |
2-
| file://:0:0:0:0 | parameter position 1 of dict.setdefault |
31
| test.py:1:1:1:21 | SynthDictSplatParameterNode |
42
| test.py:1:19:1:19 | ControlFlowNode for x |
53
| test.py:7:5:7:20 | ControlFlowNode for obfuscated_id() |
Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import experimental.dataflow.callGraphConfig
22

33
from DataFlow::Node sink
4-
where exists(CallGraphConfig cfg | cfg.isSink(sink))
4+
where
5+
exists(CallGraphConfig cfg | cfg.isSink(sink)) and
6+
exists(sink.getLocation().getFile().getRelativePath())
57
select sink
Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,2 @@
1-
| file://:0:0:0:0 | [summary] to write: return (return) in builtins.reversed |
2-
| file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault |
31
| test.py:4:10:4:10 | ControlFlowNode for z |
42
| test.py:7:19:7:19 | ControlFlowNode for a |
Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import experimental.dataflow.callGraphConfig
22

33
from DataFlow::Node source
4-
where exists(CallGraphConfig cfg | cfg.isSource(source))
4+
where
5+
exists(CallGraphConfig cfg | cfg.isSource(source)) and
6+
exists(source.getLocation().getFile().getRelativePath())
57
select source

python/ql/test/experimental/dataflow/basic/global.expected

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
2-
| file://:0:0:0:0 | parameter position 1 of dict.setdefault | file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault |
31
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
42
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |
53
| test.py:1:5:1:17 | GSSA Variable obfuscated_id | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |

python/ql/test/experimental/dataflow/basic/global.ql

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,7 @@ import allFlowsConfig
33
from DataFlow::Node source, DataFlow::Node sink
44
where
55
source != sink and
6-
exists(AllFlowsConfig cfg | cfg.hasFlow(source, sink))
6+
exists(AllFlowsConfig cfg | cfg.hasFlow(source, sink)) and
7+
exists(source.getLocation().getFile().getRelativePath()) and
8+
exists(sink.getLocation().getFile().getRelativePath())
79
select source, sink

python/ql/test/experimental/dataflow/basic/globalStep.expected

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
| file://:0:0:0:0 | [summary] read: argument position 0.List element in builtins.reversed | file://:0:0:0:0 | [summary] to write: return (return).List element in builtins.reversed |
2-
| file://:0:0:0:0 | parameter position 1 of dict.setdefault | file://:0:0:0:0 | [summary] to write: return (return) in dict.setdefault |
31
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
42
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:1:5:1:17 | GSSA Variable obfuscated_id |
53
| test.py:1:1:1:21 | ControlFlowNode for FunctionExpr | test.py:7:5:7:17 | ControlFlowNode for obfuscated_id |

0 commit comments

Comments
 (0)