Skip to content

Commit 9817845

Browse files
author
Max Schaefer
committed
Python: Add support for more URL redirect sanitisers.
Since some sanitisers don't handle backslashes correctly, I updated the data-flow configuration to incorporate a flow state tracking whether or not backslashes have been eliminated or converted to forward slashes.
1 parent 6533269 commit 9817845

File tree

7 files changed

+297
-11
lines changed

7 files changed

+297
-11
lines changed

python/ql/lib/semmle/python/frameworks/Stdlib/Urllib.qll

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
private import python
1111
private import semmle.python.Concepts
1212
private import semmle.python.ApiGraphs
13+
private import semmle.python.security.dataflow.UrlRedirectCustomizations
1314

1415
/**
1516
* Provides models for the `urllib` module, part of
@@ -70,4 +71,55 @@ private module Urllib {
7071
}
7172
}
7273
}
74+
75+
/**
76+
* Provides models for the `urllib.parse` extension library.
77+
*/
78+
module Parse {
79+
/**
80+
* A call to `urllib.parse.urlparse`.
81+
*/
82+
private DataFlow::CallCfgNode getUrlParseCall() {
83+
result = API::moduleImport("urllib").getMember("parse").getMember("urlparse").getACall()
84+
}
85+
86+
/**
87+
* A read of the `netloc` attribute of a parsed URL as returned by `urllib.parse.urlparse`,
88+
* which is being checked in a way that is relevant for URL redirection vulnerabilities.
89+
*/
90+
private predicate netlocCheck(DataFlow::GuardNode g, ControlFlowNode node, boolean branch) {
91+
exists(DataFlow::CallCfgNode urlParseCall, DataFlow::AttrRead netlocRead |
92+
urlParseCall = getUrlParseCall() and
93+
netlocRead = urlParseCall.getAnAttributeRead("netloc") and
94+
node = urlParseCall.getArg(0).asCfgNode()
95+
|
96+
// either a simple check of the netloc attribute
97+
g = netlocRead.asCfgNode() and
98+
branch = false
99+
or
100+
// or a comparison (we don't care against what)
101+
exists(Compare cmp, string op |
102+
cmp = g.getNode() and
103+
op = unique(Cmpop opp | opp = cmp.getAnOp()).getSymbol() and
104+
cmp.getASubExpression() = netlocRead.asExpr()
105+
|
106+
op in ["==", "is", "in"] and branch = true
107+
or
108+
op in ["!=", "is not", "not in"] and branch = false
109+
)
110+
)
111+
}
112+
113+
/**
114+
* A check of `urllib.parse.urlparse().netloc`, considered as a sanitizer-guard for URL redirection.
115+
*/
116+
private class NetlocCheck extends UrlRedirect::Sanitizer {
117+
NetlocCheck() { this = DataFlow::BarrierGuard<netlocCheck/3>::getABarrierNode() }
118+
119+
override predicate sanitizes(UrlRedirect::FlowState state) {
120+
// `urlparse` does not handle backslashes
121+
state instanceof UrlRedirect::NoBackslashes
122+
}
123+
}
124+
}
73125
}

python/ql/lib/semmle/python/frameworks/Yarl.qll

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ private import semmle.python.Concepts
1010
private import semmle.python.ApiGraphs
1111
private import semmle.python.frameworks.Multidict
1212
private import semmle.python.frameworks.internal.InstanceTaintStepsHelper
13+
private import semmle.python.security.dataflow.UrlRedirectCustomizations
1314

1415
/**
1516
* INTERNAL: Do not use.
@@ -108,5 +109,32 @@ module Yarl {
108109
this.(DataFlow::AttrRead).getAttributeName() = "query"
109110
}
110111
}
112+
113+
private predicate yarlUrlIsAbsoluteCall(
114+
DataFlow::GuardNode g, ControlFlowNode node, boolean branch
115+
) {
116+
exists(ClassInstantiation instance, DataFlow::MethodCallNode call |
117+
call.calls(instance, "is_absolute") and
118+
g = call.asCfgNode() and
119+
node = instance.getArg(0).asCfgNode() and
120+
branch = false
121+
)
122+
}
123+
124+
/**
125+
* A call to `yarl.URL.is_absolute`, considered as a sanitizer-guard for URL redirection.
126+
*
127+
* See https://yarl.aio-libs.org/en/latest/api/#absolute-and-relative-urls.
128+
*/
129+
private class YarlIsAbsoluteUrl extends UrlRedirect::Sanitizer {
130+
YarlIsAbsoluteUrl() {
131+
this = DataFlow::BarrierGuard<yarlUrlIsAbsoluteCall/3>::getABarrierNode()
132+
}
133+
134+
override predicate sanitizes(UrlRedirect::FlowState state) {
135+
// `is_absolute` does not handle backslashes
136+
state instanceof UrlRedirect::NoBackslashes
137+
}
138+
}
111139
}
112140
}

python/ql/lib/semmle/python/security/dataflow/UrlRedirectCustomizations.qll

Lines changed: 73 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,29 @@ private import semmle.python.dataflow.new.BarrierGuards
1616
* vulnerabilities, as well as extension points for adding your own.
1717
*/
1818
module UrlRedirect {
19+
/**
20+
* A state value to track whether the untrusted data may contain backslashes.
21+
*/
22+
abstract class FlowState extends string {
23+
bindingset[this]
24+
FlowState() { any() }
25+
}
26+
27+
/**
28+
* A state value signifying that the untrusted data may contain backslashes.
29+
*/
30+
class MayContainBackslashes extends UrlRedirect::FlowState {
31+
MayContainBackslashes() { this = "MayContainBackslashes" }
32+
}
33+
34+
/**
35+
* A state value signifying that any backslashes in the untrusted data have
36+
* been eliminated, but no other sanitization has happened.
37+
*/
38+
class NoBackslashes extends UrlRedirect::FlowState {
39+
NoBackslashes() { this = "NoBackslashes" }
40+
}
41+
1942
/**
2043
* A data flow source for "URL redirection" vulnerabilities.
2144
*/
@@ -29,7 +52,32 @@ module UrlRedirect {
2952
/**
3053
* A sanitizer for "URL redirection" vulnerabilities.
3154
*/
32-
abstract class Sanitizer extends DataFlow::Node { }
55+
abstract class Sanitizer extends DataFlow::Node {
56+
/**
57+
* Holds if this sanitizer sanitizes flow in the given state.
58+
*
59+
* By default, sanitizers sanitize all flow, but some sanitiziers, for example,
60+
* do not handle untrusted input that contains backslashes, so they only sanitize
61+
* flow in the `NoBackslashes` state.
62+
*/
63+
predicate sanitizes(FlowState state) { any() }
64+
}
65+
66+
/**
67+
* An additional flow step for "URL redirection" vulnerabilities.
68+
*/
69+
abstract class AdditionalFlowStep extends DataFlow::Node {
70+
/**
71+
* Holds if there should be an additional flow step from `nodeFrom` in `stateFrom`
72+
* to `nodeTo` in `stateTo`.
73+
*
74+
* For example, a call to `replace` that replaces backslashes with forward slashes
75+
* takes flow from `MayContainBackslashes` to `NoBackslashes`.
76+
*/
77+
abstract predicate step(
78+
DataFlow::Node nodeFrom, FlowState stateFrom, DataFlow::Node nodeTo, FlowState stateTo
79+
);
80+
}
3381

3482
/**
3583
* A source of remote user input, considered as a flow source.
@@ -59,6 +107,30 @@ module UrlRedirect {
59107
}
60108
}
61109

110+
/**
111+
* A call to replace backslashes with forward slashes or eliminates them
112+
* altogether, considered as a partial sanitizer, as well as an additional
113+
* flow step.
114+
*/
115+
class ReplaceBackslashesSanitizer extends Sanitizer, AdditionalFlowStep, DataFlow::MethodCallNode {
116+
ReplaceBackslashesSanitizer() {
117+
this.getFunction().(DataFlow::AttrRead).getAttributeName() = "replace" and
118+
this.getArg(0).asExpr().(StrConst).getText() = "\\" and
119+
this.getArg(1).asExpr().(StrConst).getText() in ["/", ""]
120+
}
121+
122+
override predicate sanitizes(FlowState state) { state instanceof MayContainBackslashes }
123+
124+
override predicate step(
125+
DataFlow::Node nodeFrom, FlowState stateFrom, DataFlow::Node nodeTo, FlowState stateTo
126+
) {
127+
nodeFrom = this.getObject() and
128+
stateFrom instanceof MayContainBackslashes and
129+
nodeTo = this and
130+
stateTo instanceof NoBackslashes
131+
}
132+
}
133+
62134
/**
63135
* A comparison with a constant string, considered as a sanitizer-guard.
64136
*/

python/ql/lib/semmle/python/security/dataflow/UrlRedirectQuery.qll

Lines changed: 37 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
private import python
1010
import semmle.python.dataflow.new.DataFlow
1111
import semmle.python.dataflow.new.TaintTracking
12-
import UrlRedirectCustomizations::UrlRedirect
12+
import UrlRedirectCustomizations::UrlRedirect as UrlRedirect
1313

1414
/**
1515
* DEPRECATED: Use `UrlRedirectFlow` module instead.
@@ -19,20 +19,48 @@ import UrlRedirectCustomizations::UrlRedirect
1919
deprecated class Configuration extends TaintTracking::Configuration {
2020
Configuration() { this = "UrlRedirect" }
2121

22-
override predicate isSource(DataFlow::Node source) { source instanceof Source }
22+
override predicate isSource(DataFlow::Node source, DataFlow::FlowState state) {
23+
source instanceof UrlRedirect::Source and state instanceof UrlRedirect::MayContainBackslashes
24+
}
2325

24-
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
26+
override predicate isSink(DataFlow::Node sink, DataFlow::FlowState state) {
27+
sink instanceof UrlRedirect::Sink and state instanceof UrlRedirect::FlowState
28+
}
2529

26-
override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
30+
override predicate isSanitizer(DataFlow::Node node, DataFlow::FlowState state) {
31+
node.(UrlRedirect::Sanitizer).sanitizes(state)
32+
}
33+
34+
override predicate isAdditionalTaintStep(
35+
DataFlow::Node nodeFrom, DataFlow::FlowState stateFrom, DataFlow::Node nodeTo,
36+
DataFlow::FlowState stateTo
37+
) {
38+
any(UrlRedirect::AdditionalFlowStep a).step(nodeFrom, stateFrom, nodeTo, stateTo)
39+
}
2740
}
2841

29-
private module UrlRedirectConfig implements DataFlow::ConfigSig {
30-
predicate isSource(DataFlow::Node source) { source instanceof Source }
42+
private module UrlRedirectConfig implements DataFlow::StateConfigSig {
43+
class FlowState = UrlRedirect::FlowState;
44+
45+
predicate isSource(DataFlow::Node source, FlowState state) {
46+
source instanceof UrlRedirect::Source and state instanceof UrlRedirect::MayContainBackslashes
47+
}
48+
49+
predicate isSink(DataFlow::Node sink, FlowState state) {
50+
sink instanceof UrlRedirect::Sink and
51+
state = state
52+
}
3153

32-
predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
54+
predicate isBarrier(DataFlow::Node node, FlowState state) {
55+
node.(UrlRedirect::Sanitizer).sanitizes(state)
56+
}
3357

34-
predicate isBarrier(DataFlow::Node node) { node instanceof Sanitizer }
58+
predicate isAdditionalFlowStep(
59+
DataFlow::Node nodeFrom, FlowState stateFrom, DataFlow::Node nodeTo, FlowState stateTo
60+
) {
61+
any(UrlRedirect::AdditionalFlowStep a).step(nodeFrom, stateFrom, nodeTo, stateTo)
62+
}
3563
}
3664

3765
/** Global taint-tracking for detecting "URL redirection" vulnerabilities. */
38-
module UrlRedirectFlow = TaintTracking::Global<UrlRedirectConfig>;
66+
module UrlRedirectFlow = TaintTracking::GlobalWithState<UrlRedirectConfig>;
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
category: minorAnalysis
3+
---
4+
5+
- Added modeling of YARL's `is_absolute` method and checks of the `netloc` of a parsed URL as sanitizers for the `py/url-redirection` query, leading to fewer false positives.

python/ql/test/query-tests/Security/CWE-601-UrlRedirect/UrlRedirect.expected

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@ edges
99
| test.py:1:26:1:32 | ControlFlowNode for request | test.py:74:17:74:23 | ControlFlowNode for request |
1010
| test.py:1:26:1:32 | ControlFlowNode for request | test.py:81:17:81:23 | ControlFlowNode for request |
1111
| test.py:1:26:1:32 | ControlFlowNode for request | test.py:90:17:90:23 | ControlFlowNode for request |
12+
| test.py:1:26:1:32 | ControlFlowNode for request | test.py:111:17:111:23 | ControlFlowNode for request |
13+
| test.py:1:26:1:32 | ControlFlowNode for request | test.py:137:17:137:23 | ControlFlowNode for request |
14+
| test.py:1:26:1:32 | ControlFlowNode for request | test.py:145:17:145:23 | ControlFlowNode for request |
1215
| test.py:7:5:7:10 | ControlFlowNode for target | test.py:8:21:8:26 | ControlFlowNode for target |
1316
| test.py:7:14:7:20 | ControlFlowNode for request | test.py:7:14:7:25 | ControlFlowNode for Attribute |
1417
| test.py:7:14:7:25 | ControlFlowNode for Attribute | test.py:7:14:7:43 | ControlFlowNode for Attribute() |
@@ -52,6 +55,18 @@ edges
5255
| test.py:90:17:90:23 | ControlFlowNode for request | test.py:90:17:90:28 | ControlFlowNode for Attribute |
5356
| test.py:90:17:90:28 | ControlFlowNode for Attribute | test.py:90:17:90:46 | ControlFlowNode for Attribute() |
5457
| test.py:90:17:90:46 | ControlFlowNode for Attribute() | test.py:90:5:90:13 | ControlFlowNode for untrusted |
58+
| test.py:111:5:111:13 | ControlFlowNode for untrusted | test.py:114:25:114:33 | ControlFlowNode for untrusted |
59+
| test.py:111:17:111:23 | ControlFlowNode for request | test.py:111:17:111:28 | ControlFlowNode for Attribute |
60+
| test.py:111:17:111:28 | ControlFlowNode for Attribute | test.py:111:17:111:46 | ControlFlowNode for Attribute() |
61+
| test.py:111:17:111:46 | ControlFlowNode for Attribute() | test.py:111:5:111:13 | ControlFlowNode for untrusted |
62+
| test.py:137:5:137:13 | ControlFlowNode for untrusted | test.py:140:25:140:33 | ControlFlowNode for untrusted |
63+
| test.py:137:17:137:23 | ControlFlowNode for request | test.py:137:17:137:28 | ControlFlowNode for Attribute |
64+
| test.py:137:17:137:28 | ControlFlowNode for Attribute | test.py:137:17:137:46 | ControlFlowNode for Attribute() |
65+
| test.py:137:17:137:46 | ControlFlowNode for Attribute() | test.py:137:5:137:13 | ControlFlowNode for untrusted |
66+
| test.py:145:5:145:13 | ControlFlowNode for untrusted | test.py:148:25:148:33 | ControlFlowNode for untrusted |
67+
| test.py:145:17:145:23 | ControlFlowNode for request | test.py:145:17:145:28 | ControlFlowNode for Attribute |
68+
| test.py:145:17:145:28 | ControlFlowNode for Attribute | test.py:145:17:145:46 | ControlFlowNode for Attribute() |
69+
| test.py:145:17:145:46 | ControlFlowNode for Attribute() | test.py:145:5:145:13 | ControlFlowNode for untrusted |
5570
nodes
5671
| test.py:1:26:1:32 | ControlFlowNode for ImportMember | semmle.label | ControlFlowNode for ImportMember |
5772
| test.py:1:26:1:32 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
@@ -107,6 +122,21 @@ nodes
107122
| test.py:90:17:90:28 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
108123
| test.py:90:17:90:46 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
109124
| test.py:93:18:93:26 | ControlFlowNode for untrusted | semmle.label | ControlFlowNode for untrusted |
125+
| test.py:111:5:111:13 | ControlFlowNode for untrusted | semmle.label | ControlFlowNode for untrusted |
126+
| test.py:111:17:111:23 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
127+
| test.py:111:17:111:28 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
128+
| test.py:111:17:111:46 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
129+
| test.py:114:25:114:33 | ControlFlowNode for untrusted | semmle.label | ControlFlowNode for untrusted |
130+
| test.py:137:5:137:13 | ControlFlowNode for untrusted | semmle.label | ControlFlowNode for untrusted |
131+
| test.py:137:17:137:23 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
132+
| test.py:137:17:137:28 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
133+
| test.py:137:17:137:46 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
134+
| test.py:140:25:140:33 | ControlFlowNode for untrusted | semmle.label | ControlFlowNode for untrusted |
135+
| test.py:145:5:145:13 | ControlFlowNode for untrusted | semmle.label | ControlFlowNode for untrusted |
136+
| test.py:145:17:145:23 | ControlFlowNode for request | semmle.label | ControlFlowNode for request |
137+
| test.py:145:17:145:28 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
138+
| test.py:145:17:145:46 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
139+
| test.py:148:25:148:33 | ControlFlowNode for untrusted | semmle.label | ControlFlowNode for untrusted |
110140
subpaths
111141
#select
112142
| test.py:8:21:8:26 | ControlFlowNode for target | test.py:1:26:1:32 | ControlFlowNode for ImportMember | test.py:8:21:8:26 | ControlFlowNode for target | Untrusted URL redirection depends on a $@. | test.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value |
@@ -118,3 +148,6 @@ subpaths
118148
| test.py:76:21:76:26 | ControlFlowNode for unsafe | test.py:1:26:1:32 | ControlFlowNode for ImportMember | test.py:76:21:76:26 | ControlFlowNode for unsafe | Untrusted URL redirection depends on a $@. | test.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value |
119149
| test.py:83:21:83:26 | ControlFlowNode for unsafe | test.py:1:26:1:32 | ControlFlowNode for ImportMember | test.py:83:21:83:26 | ControlFlowNode for unsafe | Untrusted URL redirection depends on a $@. | test.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value |
120150
| test.py:93:18:93:26 | ControlFlowNode for untrusted | test.py:1:26:1:32 | ControlFlowNode for ImportMember | test.py:93:18:93:26 | ControlFlowNode for untrusted | Untrusted URL redirection depends on a $@. | test.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value |
151+
| test.py:114:25:114:33 | ControlFlowNode for untrusted | test.py:1:26:1:32 | ControlFlowNode for ImportMember | test.py:114:25:114:33 | ControlFlowNode for untrusted | Untrusted URL redirection depends on a $@. | test.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value |
152+
| test.py:140:25:140:33 | ControlFlowNode for untrusted | test.py:1:26:1:32 | ControlFlowNode for ImportMember | test.py:140:25:140:33 | ControlFlowNode for untrusted | Untrusted URL redirection depends on a $@. | test.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value |
153+
| test.py:148:25:148:33 | ControlFlowNode for untrusted | test.py:1:26:1:32 | ControlFlowNode for ImportMember | test.py:148:25:148:33 | ControlFlowNode for untrusted | Untrusted URL redirection depends on a $@. | test.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value |

0 commit comments

Comments
 (0)