Skip to content

Commit 6b84137

Browse files
committed
Python: Model cgi.FieldStorage (parsing of submitted forms)
1 parent 6eb2401 commit 6b84137

File tree

3 files changed

+146
-13
lines changed

3 files changed

+146
-13
lines changed

python/ql/src/semmle/python/web/stdlib/Request.qll

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,3 +55,67 @@ class HTTPMessageKind extends ExternalStringDictKind {
5555
)
5656
}
5757
}
58+
59+
/** Source of parsed HTTP forms (by using the `cgi` module). */
60+
class CgiFieldStorageSource extends HttpRequestTaintSource {
61+
CgiFieldStorageSource() { this = Value::named("cgi.FieldStorage").getACall() }
62+
63+
override predicate isSourceOf(TaintKind kind) { kind instanceof CgiFieldStorageFormKind }
64+
}
65+
66+
/** TaintKind for a parsed HTTP form. */
67+
class CgiFieldStorageFormKind extends TaintKind {
68+
/*
69+
* There is a slight difference between how we model form/fields and how it is handled by the code.
70+
* In the code
71+
* ```
72+
* form = cgi.FieldStorage()
73+
* field = form['myfield']
74+
* ```
75+
* both `form` and `field` have the type `cgi.FieldStorage`. This allows the code to represent
76+
* nested forms as `form['nested_form']['myfield']`. However, since HTML forms can't be nested
77+
* we ignore that detail since it allows for a more clean modeling.
78+
*/
79+
CgiFieldStorageFormKind() { this = "CgiFieldStorageFormKind" }
80+
81+
override TaintKind getTaintOfAttribute(string name) {
82+
name = "value" and result.(SequenceKind).getItem() instanceof CgiFieldStorageFieldKind
83+
}
84+
85+
override TaintKind getTaintOfMethodResult(string name) {
86+
name = "getvalue" and
87+
(
88+
result instanceof ExternalStringKind
89+
or
90+
result.(SequenceKind).getItem() instanceof ExternalStringKind
91+
)
92+
or
93+
name = "getfirst" and
94+
result instanceof ExternalStringKind
95+
or
96+
name = "getlist" and
97+
result.(SequenceKind).getItem() instanceof ExternalStringKind
98+
}
99+
100+
override TaintKind getTaintForFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode) {
101+
tonode.(SubscriptNode).getObject() = fromnode and
102+
(
103+
result instanceof CgiFieldStorageFieldKind
104+
or
105+
result.(SequenceKind).getItem() instanceof CgiFieldStorageFieldKind
106+
)
107+
}
108+
}
109+
110+
/** TaintKind for the field of a parsed HTTP form. */
111+
class CgiFieldStorageFieldKind extends TaintKind {
112+
CgiFieldStorageFieldKind() { this = "CgiFieldStorageFieldKind" }
113+
114+
override TaintKind getTaintOfAttribute(string name) {
115+
name = "filename" and result instanceof ExternalStringKind
116+
or
117+
name = "file" and result instanceof ExternalFileObject
118+
or
119+
name = "value" and result instanceof ExternalStringKind
120+
}
121+
}
Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,32 @@
1-
| test.py:16 | ok | taint_sources | self | BaseHTTPRequestHandlerKind |
2-
| test.py:18 | ok | taint_sources | Attribute | externally controlled string |
1+
| test.py:18 | ok | taint_sources | self | BaseHTTPRequestHandlerKind |
32
| test.py:20 | ok | taint_sources | Attribute | externally controlled string |
4-
| test.py:22 | ok | taint_sources | Attribute | {externally controlled string} |
5-
| test.py:23 | ok | taint_sources | Subscript | externally controlled string |
6-
| test.py:24 | ok | taint_sources | Attribute() | externally controlled string |
7-
| test.py:25 | ok | taint_sources | Attribute() | [externally controlled string] |
8-
| test.py:26 | fail | taint_sources | Attribute() | <NO TAINT> |
3+
| test.py:22 | ok | taint_sources | Attribute | externally controlled string |
4+
| test.py:24 | ok | taint_sources | Attribute | {externally controlled string} |
5+
| test.py:25 | ok | taint_sources | Subscript | externally controlled string |
6+
| test.py:26 | ok | taint_sources | Attribute() | externally controlled string |
97
| test.py:27 | ok | taint_sources | Attribute() | [externally controlled string] |
108
| test.py:28 | fail | taint_sources | Attribute() | <NO TAINT> |
11-
| test.py:29 | ok | taint_sources | Attribute() | externally controlled string |
12-
| test.py:30 | ok | taint_sources | Attribute() | externally controlled string |
13-
| test.py:31 | ok | taint_sources | str() | externally controlled string |
14-
| test.py:32 | ok | taint_sources | bytes() | externally controlled string |
15-
| test.py:34 | ok | taint_sources | Attribute | file[externally controlled string] |
16-
| test.py:35 | ok | taint_sources | Attribute() | externally controlled string |
9+
| test.py:29 | ok | taint_sources | Attribute() | [externally controlled string] |
10+
| test.py:30 | fail | taint_sources | Attribute() | <NO TAINT> |
11+
| test.py:31 | ok | taint_sources | Attribute() | externally controlled string |
12+
| test.py:32 | ok | taint_sources | Attribute() | externally controlled string |
13+
| test.py:33 | ok | taint_sources | str() | externally controlled string |
14+
| test.py:34 | ok | taint_sources | bytes() | externally controlled string |
15+
| test.py:36 | ok | taint_sources | Attribute | file[externally controlled string] |
16+
| test.py:37 | ok | taint_sources | Attribute() | externally controlled string |
17+
| test.py:47 | ok | taint_sources | form | CgiFieldStorageFormKind |
18+
| test.py:49 | ok | taint_sources | Subscript | CgiFieldStorageFieldKind |
19+
| test.py:49 | ok | taint_sources | Subscript | [CgiFieldStorageFieldKind] |
20+
| test.py:50 | ok | taint_sources | Attribute | externally controlled string |
21+
| test.py:51 | ok | taint_sources | Attribute | file[externally controlled string] |
22+
| test.py:52 | ok | taint_sources | Attribute | externally controlled string |
23+
| test.py:53 | ok | taint_sources | Subscript | CgiFieldStorageFieldKind |
24+
| test.py:54 | ok | taint_sources | Attribute | externally controlled string |
25+
| test.py:55 | ok | taint_sources | Attribute | file[externally controlled string] |
26+
| test.py:56 | ok | taint_sources | Attribute | externally controlled string |
27+
| test.py:58 | ok | taint_sources | Attribute() | [externally controlled string] |
28+
| test.py:58 | ok | taint_sources | Attribute() | externally controlled string |
29+
| test.py:59 | ok | taint_sources | Subscript | externally controlled string |
30+
| test.py:61 | ok | taint_sources | Attribute() | externally controlled string |
31+
| test.py:63 | ok | taint_sources | Attribute() | [externally controlled string] |
32+
| test.py:64 | ok | taint_sources | Subscript | externally controlled string |

python/ql/test/library-tests/web/stdlib/test.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
import sys
2+
import os
3+
import cgi
24

35
if sys.version_info[0] == 2:
46
from BaseHTTPServer import BaseHTTPRequestHandler
@@ -35,6 +37,33 @@ def taint_sources(self):
3537
self.rfile.read(),
3638
)
3739

40+
form = cgi.FieldStorage(
41+
self.rfile,
42+
self.headers,
43+
environ={'REQUEST_METHOD': 'POST', 'CONTENT_TYPE': self.headers.get('content-type')},
44+
)
45+
46+
ensure_tainted(
47+
form,
48+
49+
form['key'],
50+
form['key'].value,
51+
form['key'].file,
52+
form['key'].filename,
53+
form['key'][0], # will be a list, if multiple fields named "key" are provided
54+
form['key'][0].value,
55+
form['key'][0].file,
56+
form['key'][0].filename,
57+
58+
form.getvalue('key'),
59+
form.getvalue('key')[0], # will be a list, if multiple fields named "key" are provided
60+
61+
form.getfirst('key'),
62+
63+
form.getlist('key'),
64+
form.getlist('key')[0],
65+
)
66+
3867
def do_GET(self):
3968
# send_response will log a line to stderr
4069
self.send_response(200)
@@ -44,6 +73,27 @@ def do_GET(self):
4473
print(self.headers)
4574

4675

76+
def do_POST(self):
77+
form = cgi.FieldStorage(
78+
self.rfile,
79+
self.headers,
80+
environ={'REQUEST_METHOD': 'POST', 'CONTENT_TYPE': self.headers.get('content-type')},
81+
)
82+
83+
if 'myfile' not in form:
84+
self.send_response(422)
85+
self.end_headers()
86+
return
87+
88+
field = form['myfile']
89+
90+
field.file.seek(0, os.SEEK_END)
91+
filesize = field.file.tell()
92+
93+
print("Uploaded {!r} with {} bytes".format(field.filename, filesize))
94+
95+
self.send_response(200)
96+
self.end_headers()
4797

4898

4999
if __name__ == "__main__":
@@ -52,3 +102,6 @@ def do_GET(self):
52102

53103
# Headers works case insensitvely, so self.headers['foo'] == self.headers['FOO']
54104
# curl localhost:8080 --header "Foo: 1" --header "foo: 2"
105+
106+
# To test file submission through forms, use
107+
# curl -F myfile=@<yourfile> localhost:8080

0 commit comments

Comments
 (0)