Skip to content

Commit 3774310

Browse files
committed
Python: Reduce FPs in Django due to bad XSS taint-sinks
Fixes github/codeql-python-team#38
1 parent fa08676 commit 3774310

File tree

6 files changed

+73
-30
lines changed

6 files changed

+73
-30
lines changed

python/ql/src/semmle/python/web/django/Response.qll

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ private class DjangoResponseKind extends TaintKind {
1212
/** INTENRAL taint-source used for tracking a django response. */
1313
private class DjangoResponseSource extends TaintSource {
1414
DjangoResponseSource() {
15-
exists(DjangoXSSVulnResponse cls |
15+
exists(DjangoXSSVulnerableResponse cls |
1616
cls.getACall() = this
1717
)
1818
}
@@ -40,12 +40,17 @@ class DjangoResponseWrite extends HttpResponseTaintSink {
4040
/** An argument to initialization of a django response, which is vulnerable to external data (xss) */
4141
class DjangoResponseContent extends HttpResponseTaintSink {
4242
DjangoResponseContent() {
43-
exists(CallNode call, DjangoXSSVulnResponse cls |
44-
call = cls.getACall()
45-
|
46-
call.getArg(0) = this
47-
or
48-
call.getArgByName("content") = this
43+
exists(CallNode call, DjangoXSSVulnerableResponse cls |
44+
call = cls.getACall() and
45+
this = cls.getContentArg(call) and
46+
(
47+
not exists(cls.getContentTypeArg(call))
48+
or
49+
exists(StringValue s |
50+
cls.getContentTypeArg(call).pointsTo(s) and
51+
s.getText().indexOf("text/html") = 0
52+
)
53+
)
4954
)
5055
}
5156

python/ql/src/semmle/python/web/django/Shared.qll

Lines changed: 36 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,38 +2,64 @@ import python
22

33
/** A Class that is a Django Response (subclass of `django.http.HttpResponse`). */
44
class DjangoResponse extends ClassValue {
5+
ClassValue base;
6+
57
DjangoResponse() {
6-
exists(ClassValue base |
8+
(
79
// version 1.x
810
base = Value::named("django.http.response.HttpResponse")
911
or
1012
// version 2.x and 3.x
1113
// https://docs.djangoproject.com/en/2.2/ref/request-response/#httpresponse-objects
1214
base = Value::named("django.http.HttpResponse")
13-
|
14-
this.getASuperType() = base
15-
)
15+
) and
16+
this.getASuperType() = base
1617
}
1718
}
1819

1920
/** A Class that is a Django Redirect Response (subclass of `django.http.HttpResponseRedirectBase`). */
2021
class DjangoRedirectResponse extends DjangoResponse {
2122
DjangoRedirectResponse() {
22-
exists(ClassValue base |
23+
exists(ClassValue redirect_base |
2324
// version 1.x
24-
base = Value::named("django.http.response.HttpResponseRedirectBase")
25+
redirect_base = Value::named("django.http.response.HttpResponseRedirectBase")
2526
or
2627
// version 2.x and 3.x
27-
base = Value::named("django.http.HttpResponseRedirectBase")
28+
redirect_base = Value::named("django.http.HttpResponseRedirectBase")
2829
|
29-
this.getASuperType() = base
30+
this.getASuperType() = redirect_base
3031
)
3132
}
3233
}
3334

3435
/** A Class that is a Django Response, and is vulnerable to XSS. */
35-
class DjangoXSSVulnResponse extends DjangoResponse {
36-
DjangoXSSVulnResponse() {
36+
class DjangoXSSVulnerableResponse extends DjangoResponse {
37+
DjangoXSSVulnerableResponse() {
38+
// We want to avoid FPs on subclasses that are not exposed to XSS, for example `JsonResponse`.
39+
// The easiest way is to disregard any subclass that has a special `__init__` method.
40+
// It's not guaranteed to remove all FPs, or not to generate FNs, but compared to our
41+
// previous implementation that would treat 0-th argument to _any_ subclass as a sink,
42+
// this gets us much closer to reality.
43+
this.lookup("__init__") = base.lookup("__init__") and
3744
not this instanceof DjangoRedirectResponse
3845
}
46+
47+
// The reason these two method are defined in this class (and no in the Sink
48+
// definition that uses this class), is that if we were to add support for `HttpResponseNotAllowed`
49+
// it would make much more sense to add the custom logic in this class (or subclass), than to handle all of it
50+
// in the sink definition.
51+
52+
/** Gets the `content` argument of a `call` to the constructor */
53+
ControlFlowNode getContentArg(CallNode call) {
54+
result = call.getArg(0)
55+
or
56+
result = call.getArgByName("content")
57+
}
58+
59+
/** Gets the `content_type` argument of a `call` to the constructor */
60+
ControlFlowNode getContentTypeArg(CallNode call) {
61+
result = call.getArg(1)
62+
or
63+
result = call.getArgByName("content_type")
64+
}
3965
}

python/ql/test/library-tests/web/django/HttpResponseSinks.expected

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,8 @@
88
| views_1x.py:45:25:45:70 | django.Response(...) | externally controlled string |
99
| views_1x.py:66:25:66:55 | django.Response(...) | externally controlled string |
1010
| views_1x.py:75:25:75:33 | django.Response(...) | externally controlled string |
11-
| views_1x.py:85:25:85:55 | django.Response(...) | externally controlled string |
12-
| views_1x.py:90:25:90:33 | django.Response(...) | externally controlled string |
13-
| views_1x.py:94:25:94:58 | django.Response(...) | externally controlled string |
1411
| views_1x.py:103:33:103:55 | django.Response(...) | externally controlled string |
12+
| views_1x.py:107:25:107:47 | django.Response(...) | externally controlled string |
1513
| views_2x_3x.py:8:25:8:63 | django.Response(...) | externally controlled string |
1614
| views_2x_3x.py:12:25:12:52 | django.Response(...) | externally controlled string |
1715
| views_2x_3x.py:16:25:16:53 | django.Response(...) | externally controlled string |
@@ -25,7 +23,5 @@
2523
| views_2x_3x.py:82:25:82:69 | django.Response(...) | externally controlled string |
2624
| views_2x_3x.py:85:25:85:64 | django.Response(...) | externally controlled string |
2725
| views_2x_3x.py:88:25:88:32 | django.Response(...) | externally controlled string |
28-
| views_2x_3x.py:106:25:106:55 | django.Response(...) | externally controlled string |
29-
| views_2x_3x.py:111:25:111:33 | django.Response(...) | externally controlled string |
30-
| views_2x_3x.py:115:25:115:58 | django.Response(...) | externally controlled string |
3126
| views_2x_3x.py:124:33:124:55 | django.Response(...) | externally controlled string |
27+
| views_2x_3x.py:128:25:128:47 | django.Response(...) | externally controlled string |

python/ql/test/library-tests/web/django/views_1x.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,10 @@ def fp_manual_content_type(reuqest):
9898
def fp_redirect(request):
9999
return HttpResponseRedirect(request.GET.get("next"))
100100

101-
# Ensure that subclasses are still vuln to XSS
101+
# Ensure that simple subclasses are still vuln to XSS
102102
def tp_not_found(request):
103103
return HttpResponseNotFound(request.GET.get("name"))
104+
105+
# Ensure we still have a XSS sink when manually setting the content_type to HTML
106+
def tp_manual_response_type(request):
107+
return HttpResponse(request.GET.get("name"), content_type="text/html; charset=utf-8")

python/ql/test/library-tests/web/django/views_2x_3x.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -103,22 +103,26 @@ def not_valid_identifier(request):
103103
# FP reported in https://github.com/github/codeql-python-team/issues/38
104104
def fp_json_response(request):
105105
# implicitly sets Content-Type to "application/json"
106-
return JsonResponse({"foo": request.GET.get("foo")}) # TODO
106+
return JsonResponse({"foo": request.GET.get("foo")})
107107

108108
# Not an XSS sink, since the Content-Type is not "text/html"
109109
def fp_manual_json_response(request):
110110
json_data = '{"json": "{}"}'.format(request.GET.get("foo"))
111-
return HttpResponse(json_data, content_type="application/json") # TODO
111+
return HttpResponse(json_data, content_type="application/json")
112112

113113
# Not an XSS sink, since the Content-Type is not "text/html"
114114
def fp_manual_content_type(reuqest):
115-
return HttpResponse('<img src="0" onerror="alert(1)">', content_type="text/plain") # TODO
115+
return HttpResponse('<img src="0" onerror="alert(1)">', content_type="text/plain")
116116

117117
# XSS FP reported in https://github.com/github/codeql/issues/3466
118118
# Note: This should be a open-redirect sink, but not a XSS sink.
119119
def fp_redirect(request):
120120
return HttpResponseRedirect(request.GET.get("next"))
121121

122-
# Ensure that subclasses are still vuln to XSS
122+
# Ensure that simple subclasses are still vuln to XSS
123123
def tp_not_found(request):
124124
return HttpResponseNotFound(request.GET.get("name"))
125+
126+
# Ensure we still have a XSS sink when manually setting the content_type to HTML
127+
def tp_manual_response_type(request):
128+
return HttpResponse(request.GET.get("name"), content_type="text/html; charset=utf-8")

python/ql/test/query-tests/Security/lib/django/http/response.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,21 @@
11
class HttpResponseBase(object):
22
status_code = 200
33

4+
def __init__(self, content_type=None, status=None, reason=None, charset=None):
5+
pass
6+
47

58
class HttpResponse(HttpResponseBase):
6-
pass
9+
def __init__(self, content=b"", *args, **kwargs):
10+
super().__init__(*args, **kwargs)
11+
# Content is a bytestring. See the `content` property methods.
12+
self.content = content
713

814

915
class HttpResponseRedirectBase(HttpResponse):
10-
pass
16+
def __init__(self, redirect_to, *args, **kwargs):
17+
super().__init__(*args, **kwargs)
18+
...
1119

1220

1321
class HttpResponsePermanentRedirect(HttpResponseRedirectBase):

0 commit comments

Comments
 (0)