Skip to content

Commit 8bd3063

Browse files
authored
Merge pull request github#2875 from RasmusWL/python-taint-urlsplit
Python: Add taint for urlsplit
2 parents b25a461 + 771dfec commit 8bd3063

File tree

16 files changed

+478
-183
lines changed

16 files changed

+478
-183
lines changed

python/ql/src/semmle/python/security/strings/External.qll

Lines changed: 196 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -2,45 +2,40 @@ import python
22
import Basic
33
private import Common
44

5-
/** An extensible kind of taint representing an externally controlled string.
5+
/**
6+
* An extensible kind of taint representing an externally controlled string.
67
*/
78
abstract class ExternalStringKind extends StringKind {
8-
99
bindingset[this]
10-
ExternalStringKind() {
11-
this = this
12-
}
10+
ExternalStringKind() { this = this }
1311

1412
override TaintKind getTaintForFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode) {
1513
result = StringKind.super.getTaintForFlowStep(fromnode, tonode)
1614
or
17-
tonode.(SequenceNode).getElement(_) = fromnode and result.(ExternalStringSequenceKind).getItem() = this
15+
tonode.(SequenceNode).getElement(_) = fromnode and
16+
result.(ExternalStringSequenceKind).getItem() = this
1817
or
1918
json_load(fromnode, tonode) and result.(ExternalJsonKind).getValue() = this
2019
or
2120
tonode.(DictNode).getAValue() = fromnode and result.(ExternalStringDictKind).getValue() = this
21+
or
22+
urlsplit(fromnode, tonode) and result.(ExternalUrlSplitResult).getItem() = this
23+
or
24+
urlparse(fromnode, tonode) and result.(ExternalUrlParseResult).getItem() = this
2225
}
23-
2426
}
2527

2628
/** A kind of "taint", representing a sequence, with a "taint" member */
2729
class ExternalStringSequenceKind extends SequenceKind {
28-
29-
ExternalStringSequenceKind() {
30-
this.getItem() instanceof ExternalStringKind
31-
}
32-
30+
ExternalStringSequenceKind() { this.getItem() instanceof ExternalStringKind }
3331
}
3432

35-
/** An hierachical dictionary or list where the entire structure is externally controlled
33+
/**
34+
* An hierachical dictionary or list where the entire structure is externally controlled
3635
* This is typically a parsed JSON object.
3736
*/
3837
class ExternalJsonKind extends TaintKind {
39-
40-
ExternalJsonKind() {
41-
this = "json[" + any(ExternalStringKind key) + "]"
42-
}
43-
38+
ExternalJsonKind() { this = "json[" + any(ExternalStringKind key) + "]" }
4439

4540
/** Gets the taint kind for item in this sequence */
4641
TaintKind getValue() {
@@ -54,65 +49,225 @@ class ExternalJsonKind extends TaintKind {
5449
json_subscript_taint(tonode, fromnode, this, result)
5550
or
5651
result = this and copy_call(fromnode, tonode)
57-
}
52+
}
5853

5954
override TaintKind getTaintOfMethodResult(string name) {
6055
name = "get" and result = this.getValue()
61-
}
62-
56+
}
6357
}
6458

6559
/** A kind of "taint", representing a dictionary mapping str->"taint" */
6660
class ExternalStringDictKind extends DictKind {
61+
ExternalStringDictKind() { this.getValue() instanceof ExternalStringKind }
62+
}
6763

68-
ExternalStringDictKind() {
69-
this.getValue() instanceof ExternalStringKind
64+
/**
65+
* A kind of "taint", representing a dictionary mapping strings to sequences of
66+
* tainted strings
67+
*/
68+
class ExternalStringSequenceDictKind extends DictKind {
69+
ExternalStringSequenceDictKind() { this.getValue() instanceof ExternalStringSequenceKind }
70+
}
71+
72+
/** TaintKind for the result of `urlsplit(tainted_string)` */
73+
class ExternalUrlSplitResult extends ExternalStringSequenceKind {
74+
// https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlsplit
75+
override TaintKind getTaintOfAttribute(string name) {
76+
result = super.getTaintOfAttribute(name)
77+
or
78+
(
79+
// namedtuple field names
80+
name = "scheme" or
81+
name = "netloc" or
82+
name = "path" or
83+
name = "query" or
84+
name = "fragment" or
85+
// class methods
86+
name = "username" or
87+
name = "password" or
88+
name = "hostname"
89+
) and
90+
result instanceof ExternalStringKind
7091
}
7192

93+
override TaintKind getTaintOfMethodResult(string name) {
94+
result = super.getTaintOfMethodResult(name)
95+
or
96+
name = "geturl" and
97+
result instanceof ExternalStringKind
98+
}
7299
}
73100

74-
/** A kind of "taint", representing a dictionary mapping strings to sequences of
75-
* tainted strings */
101+
/** TaintKind for the result of `urlparse(tainted_string)` */
102+
class ExternalUrlParseResult extends ExternalStringSequenceKind {
103+
// https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlparse
104+
override TaintKind getTaintOfAttribute(string name) {
105+
result = super.getTaintOfAttribute(name)
106+
or
107+
(
108+
// namedtuple field names
109+
name = "scheme" or
110+
name = "netloc" or
111+
name = "path" or
112+
name = "params" or
113+
name = "query" or
114+
name = "fragment" or
115+
// class methods
116+
name = "username" or
117+
name = "password" or
118+
name = "hostname"
119+
) and
120+
result instanceof ExternalStringKind
121+
}
76122

77-
class ExternalStringSequenceDictKind extends DictKind {
78-
ExternalStringSequenceDictKind() {
79-
this.getValue() instanceof ExternalStringSequenceKind
123+
override TaintKind getTaintOfMethodResult(string name) {
124+
result = super.getTaintOfMethodResult(name)
125+
or
126+
name = "geturl" and
127+
result instanceof ExternalStringKind
80128
}
81129
}
82130

83131
/* Helper for getTaintForStep() */
84-
pragma [noinline]
85-
private predicate json_subscript_taint(SubscriptNode sub, ControlFlowNode obj, ExternalJsonKind seq, TaintKind key) {
132+
pragma[noinline]
133+
private predicate json_subscript_taint(
134+
SubscriptNode sub, ControlFlowNode obj, ExternalJsonKind seq, TaintKind key
135+
) {
86136
sub.isLoad() and
87137
sub.getValue() = obj and
88138
key = seq.getValue()
89139
}
90140

91-
92141
private predicate json_load(ControlFlowNode fromnode, CallNode tonode) {
93142
exists(FunctionObject json_loads |
94143
ModuleObject::named("json").attr("loads") = json_loads and
95-
json_loads.getACall() = tonode and tonode.getArg(0) = fromnode
144+
json_loads.getACall() = tonode and
145+
tonode.getArg(0) = fromnode
96146
)
97147
}
98148

99-
/** A kind of "taint", representing an open file-like object from an external source. */
100-
class ExternalFileObject extends TaintKind {
149+
private predicate urlsplit(ControlFlowNode fromnode, CallNode tonode) {
150+
// This could be implemented as `exists(FunctionValue` without the explicit six part,
151+
// but then our tests will need to import +100 modules, so for now this slightly
152+
// altered version gets to live on.
153+
exists(Value urlsplit |
154+
(
155+
urlsplit = Value::named("six.moves.urllib.parse.urlsplit")
156+
or
157+
// Python 2
158+
urlsplit = Value::named("urlparse.urlsplit")
159+
or
160+
// Python 3
161+
urlsplit = Value::named("urllib.parse.urlsplit")
162+
) and
163+
tonode = urlsplit.getACall() and
164+
tonode.getArg(0) = fromnode
165+
)
166+
}
101167

102-
ExternalFileObject() {
103-
this = "file[" + any(ExternalStringKind key) + "]"
104-
}
168+
private predicate urlparse(ControlFlowNode fromnode, CallNode tonode) {
169+
// This could be implemented as `exists(FunctionValue` without the explicit six part,
170+
// but then our tests will need to import +100 modules, so for now this slightly
171+
// altered version gets to live on.
172+
exists(Value urlparse |
173+
(
174+
urlparse = Value::named("six.moves.urllib.parse.urlparse")
175+
or
176+
// Python 2
177+
urlparse = Value::named("urlparse.urlparse")
178+
or
179+
// Python 3
180+
urlparse = Value::named("urllib.parse.urlparse")
181+
) and
182+
tonode = urlparse.getACall() and
183+
tonode.getArg(0) = fromnode
184+
)
185+
}
105186

187+
/** A kind of "taint", representing an open file-like object from an external source. */
188+
class ExternalFileObject extends TaintKind {
189+
ExternalFileObject() { this = "file[" + any(ExternalStringKind key) + "]" }
106190

107191
/** Gets the taint kind for the contents of this file */
108-
TaintKind getValue() {
109-
this = "file[" + result + "]"
110-
}
192+
TaintKind getValue() { this = "file[" + result + "]" }
111193

112194
override TaintKind getTaintOfMethodResult(string name) {
113195
name = "read" and result = this.getValue()
114196
}
115-
116197
}
117198

199+
/**
200+
* Temporary sanitizer for the tainted result from `urlsplit` and `urlparse`. Can be used to reduce FPs until
201+
* we have better support for namedtuples.
202+
*
203+
* Will clear **all** taint on a test of the kind. That is, on the true edge of any matching test,
204+
* all fields/indexes will be cleared of taint.
205+
*
206+
* Handles:
207+
* - `if splitres.netloc == "KNOWN_VALUE"`
208+
* - `if splitres[0] == "KNOWN_VALUE"`
209+
*/
210+
class UrlsplitUrlparseTempSanitizer extends Sanitizer {
211+
// TODO: remove this once we have better support for named tuples
212+
213+
UrlsplitUrlparseTempSanitizer() { this = "UrlsplitUrlparseTempSanitizer" }
118214

215+
override predicate sanitizingEdge(TaintKind taint, PyEdgeRefinement test) {
216+
(
217+
taint instanceof ExternalUrlSplitResult
218+
or
219+
taint instanceof ExternalUrlParseResult
220+
) and
221+
exists(ControlFlowNode full_use |
222+
full_use.(SubscriptNode).getObject() = test.getInput().getAUse()
223+
or
224+
full_use.(AttrNode).getObject() = test.getInput().getAUse()
225+
|
226+
clears_taint(_, full_use, test.getTest(), test.getSense())
227+
)
228+
}
229+
230+
private predicate clears_taint(ControlFlowNode final_test, ControlFlowNode tainted, ControlFlowNode test, boolean sense) {
231+
test_equality_with_const(final_test, tainted, sense)
232+
or
233+
test_in_const_seq(final_test, tainted, sense)
234+
or
235+
test.(UnaryExprNode).getNode().getOp() instanceof Not and
236+
exists(ControlFlowNode nested_test |
237+
nested_test = test.(UnaryExprNode).getOperand() and
238+
clears_taint(final_test, tainted, nested_test, sense.booleanNot())
239+
)
240+
}
241+
242+
/** holds for `== "KNOWN_VALUE"` on `true` edge, and `!= "KNOWN_VALUE"` on `false` edge */
243+
private predicate test_equality_with_const(CompareNode cmp, ControlFlowNode tainted, boolean sense) {
244+
exists(ControlFlowNode const, Cmpop op |
245+
const.getNode() instanceof StrConst
246+
|
247+
(
248+
cmp.operands(const, op, tainted)
249+
or
250+
cmp.operands(tainted, op, const)
251+
) and
252+
(
253+
op instanceof Eq and sense = true
254+
or
255+
op instanceof NotEq and sense = false
256+
)
257+
)
258+
}
259+
260+
/** holds for `in ["KNOWN_VALUE", ...]` on `true` edge, and `not in ["KNOWN_VALUE", ...]` on `false` edge */
261+
private predicate test_in_const_seq(CompareNode cmp, ControlFlowNode tainted, boolean sense) {
262+
exists(SequenceNode const_seq, Cmpop op |
263+
forall(ControlFlowNode elem | elem = const_seq.getAnElement() | elem.getNode() instanceof StrConst)
264+
|
265+
cmp.operands(tainted, op, const_seq) and
266+
(
267+
op instanceof In and sense = true
268+
or
269+
op instanceof NotIn and sense = false
270+
)
271+
)
272+
}
273+
}
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
| UrlsplitUrlparseTempSanitizer | [externally controlled string] | test.py:21 | Pi(urlsplit_res_0) [true] |
2+
| UrlsplitUrlparseTempSanitizer | [externally controlled string] | test.py:24 | Pi(urlsplit_res_3) [true] |
3+
| UrlsplitUrlparseTempSanitizer | [externally controlled string] | test.py:27 | Pi(urlsplit_res_6) [true] |
4+
| UrlsplitUrlparseTempSanitizer | [externally controlled string] | test.py:30 | Pi(urlsplit_res_9) [true] |
5+
| string equality sanitizer | externally controlled string | test.py:21 | Pi(urlsplit_res_0) [true] |
6+
| string equality sanitizer | externally controlled string | test.py:24 | Pi(urlsplit_res_3) [true] |
7+
| string equality sanitizer | externally controlled string | test.py:27 | Pi(urlsplit_res_6) [true] |
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
import python
2+
import Taint
3+
4+
from Sanitizer s, TaintKind taint, PyEdgeRefinement test
5+
where s.sanitizingEdge(taint, test)
6+
select s, taint, test.getTest().getLocation().toString(), test.getRepresentation()
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
import python
2+
import semmle.python.security.TaintTracking
3+
import semmle.python.security.strings.Untrusted
4+
5+
class SimpleSource extends TaintSource {
6+
SimpleSource() { this.(NameNode).getId() = "TAINTED_STRING" }
7+
8+
override predicate isSourceOf(TaintKind kind) { kind instanceof ExternalStringKind }
9+
10+
override string toString() { result = "taint source" }
11+
}
12+
13+
class ListSource extends TaintSource {
14+
ListSource() { this.(NameNode).getId() = "TAINTED_LIST" }
15+
16+
override predicate isSourceOf(TaintKind kind) { kind instanceof ExternalStringSequenceKind }
17+
18+
override string toString() { result = "list taint source" }
19+
}
20+
21+
class DictSource extends TaintSource {
22+
DictSource() { this.(NameNode).getId() = "TAINTED_DICT" }
23+
24+
override predicate isSourceOf(TaintKind kind) { kind instanceof ExternalStringDictKind }
25+
26+
override string toString() { result = "dict taint source" }
27+
}
28+
29+
class TestConfig extends TaintTracking::Configuration {
30+
TestConfig() { this = "TestConfig" }
31+
32+
override predicate isSanitizer(Sanitizer sanitizer) {
33+
sanitizer instanceof UrlsplitUrlparseTempSanitizer
34+
}
35+
36+
override predicate isSource(TaintTracking::Source source) {
37+
source instanceof SimpleSource
38+
or
39+
source instanceof ListSource
40+
or
41+
source instanceof DictSource
42+
}
43+
44+
override predicate isSink(TaintTracking::Sink sink) {
45+
none()
46+
}
47+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
| test.py:13 | test_basic | a | externally controlled string |
2+
| test.py:13 | test_basic | b | externally controlled string |
3+
| test.py:13 | test_basic | c | externally controlled string |
4+
| test.py:13 | test_basic | d | externally controlled string |
5+
| test.py:13 | test_basic | urlsplit_res | [externally controlled string] |
6+
| test.py:19 | test_sanitizer | Attribute | externally controlled string |
7+
| test.py:22 | test_sanitizer | Attribute | NO TAINT |
8+
| test.py:25 | test_sanitizer | Subscript | NO TAINT |
9+
| test.py:28 | test_sanitizer | Attribute | NO TAINT |
10+
| test.py:31 | test_sanitizer | Attribute | NO TAINT |
11+
| test.py:34 | test_sanitizer | Attribute | externally controlled string |
12+
| test.py:44 | test_namedtuple | a | NO TAINT |
13+
| test.py:44 | test_namedtuple | b | NO TAINT |
14+
| test.py:44 | test_namedtuple | c | NO TAINT |
15+
| test.py:44 | test_namedtuple | d | NO TAINT |

0 commit comments

Comments
 (0)