Skip to content

Commit 7dc6518

Browse files
committed
Python: Add FileLikeObject modeling
Such that the result of `request.FILES["key"].file.read()` is tainted
1 parent 18c0d13 commit 7dc6518

File tree

3 files changed

+73
-5
lines changed

3 files changed

+73
-5
lines changed

python/ql/src/semmle/python/frameworks/Django.qll

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ private import semmle.python.dataflow.new.TaintTracking
1010
private import semmle.python.Concepts
1111
private import semmle.python.ApiGraphs
1212
private import semmle.python.frameworks.PEP249
13+
private import semmle.python.frameworks.Stdlib
1314
private import semmle.python.regex
1415
private import semmle.python.frameworks.internal.PoorMansFunctionResolution
1516
private import semmle.python.frameworks.internal.SelfRefMixin
@@ -418,9 +419,13 @@ private module Django {
418419
nodeTo.(DataFlow::AttrRead).getAttributeName() in [
419420
"content_type", "content_type_extra", "content_type_extra", "charset", "name", "file"
420421
]
421-
// TODO: Model `file` with shared-filelike such that `request.FILES["key"].file.read()` works
422422
}
423423
}
424+
425+
/** A file-like object instance that originates from a `UploadedFile`. */
426+
class UploadedFileFileLikeInstances extends Stdlib::FileLikeObject::InstanceSource {
427+
UploadedFileFileLikeInstances() { this.(DataFlow::AttrRead).accesses(instance(), "file") }
428+
}
424429
}
425430
}
426431

python/ql/src/semmle/python/frameworks/Stdlib.qll

Lines changed: 66 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,69 @@ private import semmle.python.ApiGraphs
1212
private import semmle.python.frameworks.PEP249
1313

1414
/** Provides models for the Python standard library. */
15-
private module Stdlib {
15+
module Stdlib {
16+
/**
17+
* Provides models for file-like objects,
18+
* mostly to define standard set of extra taint-steps.
19+
*
20+
* See
21+
* - https://docs.python.org/3.9/glossary.html#term-file-like-object
22+
* - https://docs.python.org/3.9/library/io.html#io.IOBase
23+
*/
24+
module FileLikeObject {
25+
/**
26+
* A source of a file-like object, extend this class to model new instances.
27+
*
28+
* This can include instantiations of the class, return values from function
29+
* calls, or a special parameter that will be set when functions are called by an external
30+
* library.
31+
*
32+
* Use the predicate `like::instance()` to get references to instances of `file.like`.
33+
*/
34+
abstract class InstanceSource extends DataFlow::LocalSourceNode { }
35+
36+
/** Gets a reference to a file-like object. */
37+
private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) {
38+
t.start() and
39+
result instanceof InstanceSource
40+
or
41+
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
42+
}
43+
44+
/** Gets a reference to a file-like object. */
45+
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
46+
47+
/**
48+
* Taint propagation for file-like objects.
49+
*/
50+
class FileLikeObjectAdditionalTaintStep extends TaintTracking::AdditionalTaintStep {
51+
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
52+
// result of method call is tainted
53+
nodeFrom = instance() and
54+
nodeTo.(DataFlow::MethodCallNode).calls(nodeFrom, ["read", "readline", "readlines"])
55+
or
56+
// taint-propagation back to instance from `foo.write(tainted_data)`
57+
exists(DataFlow::AttrRead write, DataFlow::CallCfgNode call, DataFlow::Node instance_ |
58+
instance_ = instance() and
59+
write.accesses(instance_, "write")
60+
|
61+
nodeTo.(DataFlow::PostUpdateNode).getPreUpdateNode() = instance_ and
62+
call.getFunction() = write and
63+
nodeFrom = call.getArg(0)
64+
)
65+
}
66+
}
67+
}
68+
}
69+
70+
/**
71+
* Provides models for the Python standard library.
72+
*
73+
* This module is marked private as exposing it means committing to 1-year deprecation
74+
* policy, and the code is not in a polished enough state that we want to do so -- at
75+
* least not without having convincing use-cases for it :)
76+
*/
77+
private module StdlibPrivate {
1678
// ---------------------------------------------------------------------------
1779
// os
1880
// ---------------------------------------------------------------------------
@@ -395,7 +457,8 @@ private module Stdlib {
395457
* A call to the builtin `open` function.
396458
* See https://docs.python.org/3/library/functions.html#open
397459
*/
398-
private class OpenCall extends FileSystemAccess::Range, DataFlow::CallCfgNode {
460+
private class OpenCall extends FileSystemAccess::Range, Stdlib::FileLikeObject::InstanceSource,
461+
DataFlow::CallCfgNode {
399462
OpenCall() { this = getOpenFunctionRef().getACall() }
400463

401464
override DataFlow::Node getAPathArgument() {
@@ -1081,7 +1144,7 @@ private module Stdlib {
10811144
}
10821145

10831146
/** A call to the `open` method on a `pathlib.Path` instance. */
1084-
private class PathLibOpenCall extends PathlibFileAccess {
1147+
private class PathLibOpenCall extends PathlibFileAccess, Stdlib::FileLikeObject::InstanceSource {
10851148
PathLibOpenCall() { attrbuteName = "open" }
10861149
}
10871150

python/ql/test/library-tests/frameworks/django-v2-v3/taint_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def test_taint(request: HttpRequest, foo, bar, baz=None): # $requestHandler rou
6767
request.FILES["key"].charset, # $ tainted
6868
request.FILES["key"].name, # $ tainted
6969
request.FILES["key"].file, # $ tainted
70-
request.FILES["key"].file.read(), # $ MISSING: tainted
70+
request.FILES["key"].file.read(), # $ tainted
7171

7272
request.FILES.get("key"), # $ tainted
7373
request.FILES.get("key").name, # $ tainted

0 commit comments

Comments
 (0)