@@ -2,45 +2,40 @@ import python
2
2
import Basic
3
3
private import Common
4
4
5
- /** An extensible kind of taint representing an externally controlled string.
5
+ /**
6
+ * An extensible kind of taint representing an externally controlled string.
6
7
*/
7
8
abstract class ExternalStringKind extends StringKind {
8
-
9
9
bindingset [ this ]
10
- ExternalStringKind ( ) {
11
- this = this
12
- }
10
+ ExternalStringKind ( ) { this = this }
13
11
14
12
override TaintKind getTaintForFlowStep ( ControlFlowNode fromnode , ControlFlowNode tonode ) {
15
13
result = StringKind .super .getTaintForFlowStep ( fromnode , tonode )
16
14
or
17
- tonode .( SequenceNode ) .getElement ( _) = fromnode and result .( ExternalStringSequenceKind ) .getItem ( ) = this
15
+ tonode .( SequenceNode ) .getElement ( _) = fromnode and
16
+ result .( ExternalStringSequenceKind ) .getItem ( ) = this
18
17
or
19
18
json_load ( fromnode , tonode ) and result .( ExternalJsonKind ) .getValue ( ) = this
20
19
or
21
20
tonode .( DictNode ) .getAValue ( ) = fromnode and result .( ExternalStringDictKind ) .getValue ( ) = this
21
+ or
22
+ urlsplit ( fromnode , tonode ) and result .( ExternalUrlSplitResult ) .getItem ( ) = this
23
+ or
24
+ urlparse ( fromnode , tonode ) and result .( ExternalUrlParseResult ) .getItem ( ) = this
22
25
}
23
-
24
26
}
25
27
26
28
/** A kind of "taint", representing a sequence, with a "taint" member */
27
29
class ExternalStringSequenceKind extends SequenceKind {
28
-
29
- ExternalStringSequenceKind ( ) {
30
- this .getItem ( ) instanceof ExternalStringKind
31
- }
32
-
30
+ ExternalStringSequenceKind ( ) { this .getItem ( ) instanceof ExternalStringKind }
33
31
}
34
32
35
- /** An hierachical dictionary or list where the entire structure is externally controlled
33
+ /**
34
+ * An hierachical dictionary or list where the entire structure is externally controlled
36
35
* This is typically a parsed JSON object.
37
36
*/
38
37
class ExternalJsonKind extends TaintKind {
39
-
40
- ExternalJsonKind ( ) {
41
- this = "json[" + any ( ExternalStringKind key ) + "]"
42
- }
43
-
38
+ ExternalJsonKind ( ) { this = "json[" + any ( ExternalStringKind key ) + "]" }
44
39
45
40
/** Gets the taint kind for item in this sequence */
46
41
TaintKind getValue ( ) {
@@ -54,65 +49,225 @@ class ExternalJsonKind extends TaintKind {
54
49
json_subscript_taint ( tonode , fromnode , this , result )
55
50
or
56
51
result = this and copy_call ( fromnode , tonode )
57
- }
52
+ }
58
53
59
54
override TaintKind getTaintOfMethodResult ( string name ) {
60
55
name = "get" and result = this .getValue ( )
61
- }
62
-
56
+ }
63
57
}
64
58
65
59
/** A kind of "taint", representing a dictionary mapping str->"taint" */
66
60
class ExternalStringDictKind extends DictKind {
61
+ ExternalStringDictKind ( ) { this .getValue ( ) instanceof ExternalStringKind }
62
+ }
67
63
68
- ExternalStringDictKind ( ) {
69
- this .getValue ( ) instanceof ExternalStringKind
64
+ /**
65
+ * A kind of "taint", representing a dictionary mapping strings to sequences of
66
+ * tainted strings
67
+ */
68
+ class ExternalStringSequenceDictKind extends DictKind {
69
+ ExternalStringSequenceDictKind ( ) { this .getValue ( ) instanceof ExternalStringSequenceKind }
70
+ }
71
+
72
+ /** TaintKind for the result of `urlsplit(tainted_string)` */
73
+ class ExternalUrlSplitResult extends ExternalStringSequenceKind {
74
+ // https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlsplit
75
+ override TaintKind getTaintOfAttribute ( string name ) {
76
+ result = super .getTaintOfAttribute ( name )
77
+ or
78
+ (
79
+ // namedtuple field names
80
+ name = "scheme" or
81
+ name = "netloc" or
82
+ name = "path" or
83
+ name = "query" or
84
+ name = "fragment" or
85
+ // class methods
86
+ name = "username" or
87
+ name = "password" or
88
+ name = "hostname"
89
+ ) and
90
+ result instanceof ExternalStringKind
70
91
}
71
92
93
+ override TaintKind getTaintOfMethodResult ( string name ) {
94
+ result = super .getTaintOfMethodResult ( name )
95
+ or
96
+ name = "geturl" and
97
+ result instanceof ExternalStringKind
98
+ }
72
99
}
73
100
74
- /** A kind of "taint", representing a dictionary mapping strings to sequences of
75
- * tainted strings */
101
+ /** TaintKind for the result of `urlparse(tainted_string)` */
102
+ class ExternalUrlParseResult extends ExternalStringSequenceKind {
103
+ // https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlparse
104
+ override TaintKind getTaintOfAttribute ( string name ) {
105
+ result = super .getTaintOfAttribute ( name )
106
+ or
107
+ (
108
+ // namedtuple field names
109
+ name = "scheme" or
110
+ name = "netloc" or
111
+ name = "path" or
112
+ name = "params" or
113
+ name = "query" or
114
+ name = "fragment" or
115
+ // class methods
116
+ name = "username" or
117
+ name = "password" or
118
+ name = "hostname"
119
+ ) and
120
+ result instanceof ExternalStringKind
121
+ }
76
122
77
- class ExternalStringSequenceDictKind extends DictKind {
78
- ExternalStringSequenceDictKind ( ) {
79
- this .getValue ( ) instanceof ExternalStringSequenceKind
123
+ override TaintKind getTaintOfMethodResult ( string name ) {
124
+ result = super .getTaintOfMethodResult ( name )
125
+ or
126
+ name = "geturl" and
127
+ result instanceof ExternalStringKind
80
128
}
81
129
}
82
130
83
131
/* Helper for getTaintForStep() */
84
- pragma [ noinline]
85
- private predicate json_subscript_taint ( SubscriptNode sub , ControlFlowNode obj , ExternalJsonKind seq , TaintKind key ) {
132
+ pragma [ noinline]
133
+ private predicate json_subscript_taint (
134
+ SubscriptNode sub , ControlFlowNode obj , ExternalJsonKind seq , TaintKind key
135
+ ) {
86
136
sub .isLoad ( ) and
87
137
sub .getValue ( ) = obj and
88
138
key = seq .getValue ( )
89
139
}
90
140
91
-
92
141
private predicate json_load ( ControlFlowNode fromnode , CallNode tonode ) {
93
142
exists ( FunctionObject json_loads |
94
143
ModuleObject:: named ( "json" ) .attr ( "loads" ) = json_loads and
95
- json_loads .getACall ( ) = tonode and tonode .getArg ( 0 ) = fromnode
144
+ json_loads .getACall ( ) = tonode and
145
+ tonode .getArg ( 0 ) = fromnode
96
146
)
97
147
}
98
148
99
- /** A kind of "taint", representing an open file-like object from an external source. */
100
- class ExternalFileObject extends TaintKind {
149
+ private predicate urlsplit ( ControlFlowNode fromnode , CallNode tonode ) {
150
+ // This could be implemented as `exists(FunctionValue` without the explicit six part,
151
+ // but then our tests will need to import +100 modules, so for now this slightly
152
+ // altered version gets to live on.
153
+ exists ( Value urlsplit |
154
+ (
155
+ urlsplit = Value:: named ( "six.moves.urllib.parse.urlsplit" )
156
+ or
157
+ // Python 2
158
+ urlsplit = Value:: named ( "urlparse.urlsplit" )
159
+ or
160
+ // Python 3
161
+ urlsplit = Value:: named ( "urllib.parse.urlsplit" )
162
+ ) and
163
+ tonode = urlsplit .getACall ( ) and
164
+ tonode .getArg ( 0 ) = fromnode
165
+ )
166
+ }
101
167
102
- ExternalFileObject ( ) {
103
- this = "file[" + any ( ExternalStringKind key ) + "]"
104
- }
168
+ private predicate urlparse ( ControlFlowNode fromnode , CallNode tonode ) {
169
+ // This could be implemented as `exists(FunctionValue` without the explicit six part,
170
+ // but then our tests will need to import +100 modules, so for now this slightly
171
+ // altered version gets to live on.
172
+ exists ( Value urlparse |
173
+ (
174
+ urlparse = Value:: named ( "six.moves.urllib.parse.urlparse" )
175
+ or
176
+ // Python 2
177
+ urlparse = Value:: named ( "urlparse.urlparse" )
178
+ or
179
+ // Python 3
180
+ urlparse = Value:: named ( "urllib.parse.urlparse" )
181
+ ) and
182
+ tonode = urlparse .getACall ( ) and
183
+ tonode .getArg ( 0 ) = fromnode
184
+ )
185
+ }
105
186
187
+ /** A kind of "taint", representing an open file-like object from an external source. */
188
+ class ExternalFileObject extends TaintKind {
189
+ ExternalFileObject ( ) { this = "file[" + any ( ExternalStringKind key ) + "]" }
106
190
107
191
/** Gets the taint kind for the contents of this file */
108
- TaintKind getValue ( ) {
109
- this = "file[" + result + "]"
110
- }
192
+ TaintKind getValue ( ) { this = "file[" + result + "]" }
111
193
112
194
override TaintKind getTaintOfMethodResult ( string name ) {
113
195
name = "read" and result = this .getValue ( )
114
196
}
115
-
116
197
}
117
198
199
+ /**
200
+ * Temporary sanitizer for the tainted result from `urlsplit` and `urlparse`. Can be used to reduce FPs until
201
+ * we have better support for namedtuples.
202
+ *
203
+ * Will clear **all** taint on a test of the kind. That is, on the true edge of any matching test,
204
+ * all fields/indexes will be cleared of taint.
205
+ *
206
+ * Handles:
207
+ * - `if splitres.netloc == "KNOWN_VALUE"`
208
+ * - `if splitres[0] == "KNOWN_VALUE"`
209
+ */
210
+ class UrlsplitUrlparseTempSanitizer extends Sanitizer {
211
+ // TODO: remove this once we have better support for named tuples
212
+
213
+ UrlsplitUrlparseTempSanitizer ( ) { this = "UrlsplitUrlparseTempSanitizer" }
118
214
215
+ override predicate sanitizingEdge ( TaintKind taint , PyEdgeRefinement test ) {
216
+ (
217
+ taint instanceof ExternalUrlSplitResult
218
+ or
219
+ taint instanceof ExternalUrlParseResult
220
+ ) and
221
+ exists ( ControlFlowNode full_use |
222
+ full_use .( SubscriptNode ) .getObject ( ) = test .getInput ( ) .getAUse ( )
223
+ or
224
+ full_use .( AttrNode ) .getObject ( ) = test .getInput ( ) .getAUse ( )
225
+ |
226
+ clears_taint ( _, full_use , test .getTest ( ) , test .getSense ( ) )
227
+ )
228
+ }
229
+
230
+ private predicate clears_taint ( ControlFlowNode final_test , ControlFlowNode tainted , ControlFlowNode test , boolean sense ) {
231
+ test_equality_with_const ( final_test , tainted , sense )
232
+ or
233
+ test_in_const_seq ( final_test , tainted , sense )
234
+ or
235
+ test .( UnaryExprNode ) .getNode ( ) .getOp ( ) instanceof Not and
236
+ exists ( ControlFlowNode nested_test |
237
+ nested_test = test .( UnaryExprNode ) .getOperand ( ) and
238
+ clears_taint ( final_test , tainted , nested_test , sense .booleanNot ( ) )
239
+ )
240
+ }
241
+
242
+ /** holds for `== "KNOWN_VALUE"` on `true` edge, and `!= "KNOWN_VALUE"` on `false` edge */
243
+ private predicate test_equality_with_const ( CompareNode cmp , ControlFlowNode tainted , boolean sense ) {
244
+ exists ( ControlFlowNode const , Cmpop op |
245
+ const .getNode ( ) instanceof StrConst
246
+ |
247
+ (
248
+ cmp .operands ( const , op , tainted )
249
+ or
250
+ cmp .operands ( tainted , op , const )
251
+ ) and
252
+ (
253
+ op instanceof Eq and sense = true
254
+ or
255
+ op instanceof NotEq and sense = false
256
+ )
257
+ )
258
+ }
259
+
260
+ /** holds for `in ["KNOWN_VALUE", ...]` on `true` edge, and `not in ["KNOWN_VALUE", ...]` on `false` edge */
261
+ private predicate test_in_const_seq ( CompareNode cmp , ControlFlowNode tainted , boolean sense ) {
262
+ exists ( SequenceNode const_seq , Cmpop op |
263
+ forall ( ControlFlowNode elem | elem = const_seq .getAnElement ( ) | elem .getNode ( ) instanceof StrConst )
264
+ |
265
+ cmp .operands ( tainted , op , const_seq ) and
266
+ (
267
+ op instanceof In and sense = true
268
+ or
269
+ op instanceof NotIn and sense = false
270
+ )
271
+ )
272
+ }
273
+ }
0 commit comments