Skip to content

Commit 24b37ff

Browse files
authored
Merge pull request github#15187 from github/max-schaefer/py-url-redirection
Python: Add support for more URL redirect sanitisers.
2 parents 5c43a0b + a4639c7 commit 24b37ff

File tree

11 files changed

+358
-32
lines changed

11 files changed

+358
-32
lines changed

python/ql/lib/semmle/python/frameworks/Django.qll

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2928,5 +2928,10 @@ module PrivateDjango {
29282928
DjangoAllowedUrl() {
29292929
this = DataFlow::BarrierGuard<djangoUrlHasAllowedHostAndScheme/3>::getABarrierNode()
29302930
}
2931+
2932+
override predicate sanitizes(UrlRedirect::FlowState state) {
2933+
// sanitize all flow states
2934+
any()
2935+
}
29312936
}
29322937
}

python/ql/lib/semmle/python/frameworks/Stdlib/Urllib.qll

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
private import python
1111
private import semmle.python.Concepts
1212
private import semmle.python.ApiGraphs
13+
private import semmle.python.security.dataflow.UrlRedirectCustomizations
1314

1415
/**
1516
* Provides models for the `urllib` module, part of
@@ -70,4 +71,55 @@ private module Urllib {
7071
}
7172
}
7273
}
74+
75+
/**
76+
* Provides models for the `urllib.parse` extension library.
77+
*/
78+
module Parse {
79+
/**
80+
* A call to `urllib.parse.urlparse`.
81+
*/
82+
private DataFlow::CallCfgNode getUrlParseCall() {
83+
result = API::moduleImport("urllib").getMember("parse").getMember("urlparse").getACall()
84+
}
85+
86+
/**
87+
* A read of the `netloc` attribute of a parsed URL as returned by `urllib.parse.urlparse`,
88+
* which is being checked in a way that is relevant for URL redirection vulnerabilities.
89+
*/
90+
private predicate netlocCheck(DataFlow::GuardNode g, ControlFlowNode node, boolean branch) {
91+
exists(DataFlow::CallCfgNode urlParseCall, DataFlow::AttrRead netlocRead |
92+
urlParseCall = getUrlParseCall() and
93+
netlocRead = urlParseCall.getAnAttributeRead("netloc") and
94+
node = urlParseCall.getArg(0).asCfgNode()
95+
|
96+
// either a simple check of the netloc attribute
97+
g = netlocRead.asCfgNode() and
98+
branch = false
99+
or
100+
// or a comparison (we don't care against what)
101+
exists(Compare cmp, string op |
102+
cmp = g.getNode() and
103+
op = unique(Cmpop opp | opp = cmp.getAnOp()).getSymbol() and
104+
cmp.getASubExpression() = netlocRead.asExpr()
105+
|
106+
op in ["==", "is", "in"] and branch = true
107+
or
108+
op in ["!=", "is not", "not in"] and branch = false
109+
)
110+
)
111+
}
112+
113+
/**
114+
* A check of `urllib.parse.urlparse().netloc`, considered as a sanitizer-guard for URL redirection.
115+
*/
116+
private class NetlocCheck extends UrlRedirect::Sanitizer {
117+
NetlocCheck() { this = DataFlow::BarrierGuard<netlocCheck/3>::getABarrierNode() }
118+
119+
override predicate sanitizes(UrlRedirect::FlowState state) {
120+
// `urlparse` does not handle backslashes
121+
state instanceof UrlRedirect::NoBackslashes
122+
}
123+
}
124+
}
73125
}

python/ql/lib/semmle/python/frameworks/Yarl.qll

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ private import semmle.python.Concepts
1010
private import semmle.python.ApiGraphs
1111
private import semmle.python.frameworks.Multidict
1212
private import semmle.python.frameworks.internal.InstanceTaintStepsHelper
13+
private import semmle.python.security.dataflow.UrlRedirectCustomizations
1314

1415
/**
1516
* INTERNAL: Do not use.
@@ -108,5 +109,32 @@ module Yarl {
108109
this.(DataFlow::AttrRead).getAttributeName() = "query"
109110
}
110111
}
112+
113+
private predicate yarlUrlIsAbsoluteCall(
114+
DataFlow::GuardNode g, ControlFlowNode node, boolean branch
115+
) {
116+
exists(ClassInstantiation instance, DataFlow::MethodCallNode call |
117+
call.calls(instance, "is_absolute") and
118+
g = call.asCfgNode() and
119+
node = instance.getArg(0).asCfgNode() and
120+
branch = false
121+
)
122+
}
123+
124+
/**
125+
* A call to `yarl.URL.is_absolute`, considered as a sanitizer-guard for URL redirection.
126+
*
127+
* See https://yarl.aio-libs.org/en/latest/api/#absolute-and-relative-urls.
128+
*/
129+
private class YarlIsAbsoluteUrl extends UrlRedirect::Sanitizer {
130+
YarlIsAbsoluteUrl() {
131+
this = DataFlow::BarrierGuard<yarlUrlIsAbsoluteCall/3>::getABarrierNode()
132+
}
133+
134+
override predicate sanitizes(UrlRedirect::FlowState state) {
135+
// `is_absolute` does not handle backslashes
136+
state instanceof UrlRedirect::NoBackslashes
137+
}
138+
}
111139
}
112140
}

python/ql/lib/semmle/python/security/dataflow/UrlRedirectCustomizations.qll

Lines changed: 82 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,29 @@ private import semmle.python.dataflow.new.BarrierGuards
1616
* vulnerabilities, as well as extension points for adding your own.
1717
*/
1818
module UrlRedirect {
19+
/**
20+
* A state value to track whether the untrusted data may contain backslashes.
21+
*/
22+
abstract class FlowState extends string {
23+
bindingset[this]
24+
FlowState() { any() }
25+
}
26+
27+
/**
28+
* A state value signifying that the untrusted data may contain backslashes.
29+
*/
30+
class MayContainBackslashes extends UrlRedirect::FlowState {
31+
MayContainBackslashes() { this = "MayContainBackslashes" }
32+
}
33+
34+
/**
35+
* A state value signifying that any backslashes in the untrusted data have
36+
* been eliminated, but no other sanitization has happened.
37+
*/
38+
class NoBackslashes extends UrlRedirect::FlowState {
39+
NoBackslashes() { this = "NoBackslashes" }
40+
}
41+
1942
/**
2043
* A data flow source for "URL redirection" vulnerabilities.
2144
*/
@@ -29,7 +52,28 @@ module UrlRedirect {
2952
/**
3053
* A sanitizer for "URL redirection" vulnerabilities.
3154
*/
32-
abstract class Sanitizer extends DataFlow::Node { }
55+
abstract class Sanitizer extends DataFlow::Node {
56+
/**
57+
* Holds if this sanitizer sanitizes flow in the given state.
58+
*/
59+
abstract predicate sanitizes(FlowState state);
60+
}
61+
62+
/**
63+
* An additional flow step for "URL redirection" vulnerabilities.
64+
*/
65+
abstract class AdditionalFlowStep extends DataFlow::Node {
66+
/**
67+
* Holds if there should be an additional flow step from `nodeFrom` in `stateFrom`
68+
* to `nodeTo` in `stateTo`.
69+
*
70+
* For example, a call to `replace` that replaces backslashes with forward slashes
71+
* takes flow from `MayContainBackslashes` to `NoBackslashes`.
72+
*/
73+
abstract predicate step(
74+
DataFlow::Node nodeFrom, FlowState stateFrom, DataFlow::Node nodeTo, FlowState stateTo
75+
);
76+
}
3377

3478
/**
3579
* A source of remote user input, considered as a flow source.
@@ -57,10 +101,46 @@ module UrlRedirect {
57101
string_concat.getRight() = this.asCfgNode()
58102
)
59103
}
104+
105+
override predicate sanitizes(FlowState state) {
106+
// sanitize all flow states
107+
any()
108+
}
109+
}
110+
111+
/**
112+
* A call that replaces backslashes with forward slashes or eliminates them
113+
* altogether, considered as a partial sanitizer, as well as an additional
114+
* flow step.
115+
*/
116+
class ReplaceBackslashesSanitizer extends Sanitizer, AdditionalFlowStep, DataFlow::MethodCallNode {
117+
DataFlow::Node receiver;
118+
119+
ReplaceBackslashesSanitizer() {
120+
this.calls(receiver, "replace") and
121+
this.getArg(0).asExpr().(StrConst).getText() = "\\" and
122+
this.getArg(1).asExpr().(StrConst).getText() in ["/", ""]
123+
}
124+
125+
override predicate sanitizes(FlowState state) { state instanceof MayContainBackslashes }
126+
127+
override predicate step(
128+
DataFlow::Node nodeFrom, FlowState stateFrom, DataFlow::Node nodeTo, FlowState stateTo
129+
) {
130+
nodeFrom = receiver and
131+
stateFrom instanceof MayContainBackslashes and
132+
nodeTo = this and
133+
stateTo instanceof NoBackslashes
134+
}
60135
}
61136

62137
/**
63138
* A comparison with a constant string, considered as a sanitizer-guard.
64139
*/
65-
class StringConstCompareAsSanitizerGuard extends Sanitizer, StringConstCompareBarrier { }
140+
class StringConstCompareAsSanitizerGuard extends Sanitizer, StringConstCompareBarrier {
141+
override predicate sanitizes(FlowState state) {
142+
// sanitize all flow states
143+
any()
144+
}
145+
}
66146
}

python/ql/lib/semmle/python/security/dataflow/UrlRedirectQuery.qll

Lines changed: 37 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
private import python
1010
import semmle.python.dataflow.new.DataFlow
1111
import semmle.python.dataflow.new.TaintTracking
12-
import UrlRedirectCustomizations::UrlRedirect
12+
import UrlRedirectCustomizations::UrlRedirect as UrlRedirect
1313

1414
/**
1515
* DEPRECATED: Use `UrlRedirectFlow` module instead.
@@ -19,20 +19,48 @@ import UrlRedirectCustomizations::UrlRedirect
1919
deprecated class Configuration extends TaintTracking::Configuration {
2020
Configuration() { this = "UrlRedirect" }
2121

22-
override predicate isSource(DataFlow::Node source) { source instanceof Source }
22+
override predicate isSource(DataFlow::Node source, DataFlow::FlowState state) {
23+
source instanceof UrlRedirect::Source and state instanceof UrlRedirect::MayContainBackslashes
24+
}
2325

24-
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
26+
override predicate isSink(DataFlow::Node sink, DataFlow::FlowState state) {
27+
sink instanceof UrlRedirect::Sink and state instanceof UrlRedirect::FlowState
28+
}
2529

26-
override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
30+
override predicate isSanitizer(DataFlow::Node node, DataFlow::FlowState state) {
31+
node.(UrlRedirect::Sanitizer).sanitizes(state)
32+
}
33+
34+
override predicate isAdditionalTaintStep(
35+
DataFlow::Node nodeFrom, DataFlow::FlowState stateFrom, DataFlow::Node nodeTo,
36+
DataFlow::FlowState stateTo
37+
) {
38+
any(UrlRedirect::AdditionalFlowStep a).step(nodeFrom, stateFrom, nodeTo, stateTo)
39+
}
2740
}
2841

29-
private module UrlRedirectConfig implements DataFlow::ConfigSig {
30-
predicate isSource(DataFlow::Node source) { source instanceof Source }
42+
private module UrlRedirectConfig implements DataFlow::StateConfigSig {
43+
class FlowState = UrlRedirect::FlowState;
44+
45+
predicate isSource(DataFlow::Node source, FlowState state) {
46+
source instanceof UrlRedirect::Source and state instanceof UrlRedirect::MayContainBackslashes
47+
}
48+
49+
predicate isSink(DataFlow::Node sink, FlowState state) {
50+
sink instanceof UrlRedirect::Sink and
51+
exists(state)
52+
}
3153

32-
predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
54+
predicate isBarrier(DataFlow::Node node, FlowState state) {
55+
node.(UrlRedirect::Sanitizer).sanitizes(state)
56+
}
3357

34-
predicate isBarrier(DataFlow::Node node) { node instanceof Sanitizer }
58+
predicate isAdditionalFlowStep(
59+
DataFlow::Node nodeFrom, FlowState stateFrom, DataFlow::Node nodeTo, FlowState stateTo
60+
) {
61+
any(UrlRedirect::AdditionalFlowStep a).step(nodeFrom, stateFrom, nodeTo, stateTo)
62+
}
3563
}
3664

3765
/** Global taint-tracking for detecting "URL redirection" vulnerabilities. */
38-
module UrlRedirectFlow = TaintTracking::Global<UrlRedirectConfig>;
66+
module UrlRedirectFlow = TaintTracking::GlobalWithState<UrlRedirectConfig>;

python/ql/src/Security/CWE-601/UrlRedirect.qhelp

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -39,26 +39,39 @@ and check that the user input is in that list:
3939

4040
<p>
4141
Often this is not possible, so an alternative is to check that the target URL does not
42-
specify an explicit host name. For example, the Django framework provides a
43-
function <code>url_has_allowed_host_and_scheme</code> that can be used to check that a
44-
URL is safe to redirect to, as shown in the following example:
42+
specify an explicit host name. For example, you can use the <code>urlparse</code> function
43+
from the Python standard library to parse the URL and check that the <code>netloc</code>
44+
attribute is empty.
45+
</p>
46+
47+
<p>
48+
Note, however, that many browsers accept backslash characters (<code>\</code>) as equivalent
49+
to forward slash characters (<code>/</code>) in URLs, but the <code>urlparse</code> function
50+
does not. To account for this, you can first replace all backslashes with forward slashes,
51+
as shown in the following example:
4552
</p>
4653

4754
<sample src="examples/redirect_good2.py"/>
4855

4956
<p>
50-
Note that many browsers accept backslash characters (<code>\</code>) as equivalent to
51-
forward slash characters (<code>/</code>) in URLs, so it is important to account for this
52-
when validating the URL, for example by first replacing all backslashes with forward
53-
slashes. Django's <code>url_has_allowed_host_and_scheme</code> function
54-
does this automatically, but other libraries may not.
57+
For Django application, you can use the function <code>url_has_allowed_host_and_scheme</code>
58+
to check that a URL is safe to redirect to, as shown in the following example:
59+
</p>
60+
61+
<sample src="examples/redirect_good3.py"/>
62+
63+
<p>
64+
Note that <code>url_has_allowed_host_and_scheme</code> handles backslashes correctly, so no
65+
additional processing is required.
5566
</p>
5667

5768
</example>
5869

5970
<references>
6071
<li>OWASP: <a href="https://cheatsheetseries.owasp.org/cheatsheets/Unvalidated_Redirects_and_Forwards_Cheat_Sheet.html">
6172
XSS Unvalidated Redirects and Forwards Cheat Sheet</a>.</li>
73+
<li>Python standard library: <a href="https://docs.python.org/3/library/urllib.parse.html">
74+
urllib.parse</a>.</li>
6275
</references>
6376

6477
</qhelp>
Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
1-
from django.http import HttpResponseRedirect
2-
from django.shortcuts import redirect
3-
from django.utils.http import url_has_allowed_host_and_scheme
4-
from django.views import View
1+
from flask import Flask, request, redirect
2+
from urllib.parse import urlparse
53

6-
class RedirectView(View):
7-
def get(self, request, *args, **kwargs):
8-
target = request.GET.get('target', '')
9-
if url_has_allowed_host_and_scheme(target, allowed_hosts=None):
10-
return HttpResponseRedirect(target)
11-
else:
12-
# ignore the target and redirect to the home page
13-
return redirect('/')
4+
app = Flask(__name__)
5+
6+
@app.route('/')
7+
def hello():
8+
target = request.args.get('target', '')
9+
target = target.replace('\\', '')
10+
if not urlparse(target).netloc:
11+
# relative path, safe to redirect
12+
return redirect(target, code=302)
13+
# ignore the target and redirect to the home page
14+
return redirect('/', code=302)
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
from django.http import HttpResponseRedirect
2+
from django.shortcuts import redirect
3+
from django.utils.http import url_has_allowed_host_and_scheme
4+
from django.views import View
5+
6+
class RedirectView(View):
7+
def get(self, request, *args, **kwargs):
8+
target = request.GET.get('target', '')
9+
if url_has_allowed_host_and_scheme(target, allowed_hosts=None):
10+
return HttpResponseRedirect(target)
11+
else:
12+
# ignore the target and redirect to the home page
13+
return redirect('/')
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
category: minorAnalysis
3+
---
4+
5+
- Added modeling of YARL's `is_absolute` method and checks of the `netloc` of a parsed URL as sanitizers for the `py/url-redirection` query, leading to fewer false positives.

0 commit comments

Comments
 (0)