Skip to content

Commit a53d294

Browse files
authored
Merge pull request github#18203 from asgerf/jss/document-url
JS: Use TaintedUrlSuffix in ClientSideUrlRedirect
2 parents f8abc5a + 97b78e7 commit a53d294

File tree

18 files changed

+486
-235
lines changed

18 files changed

+486
-235
lines changed

javascript/ql/lib/semmle/javascript/dataflow/Nodes.qll

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1611,7 +1611,12 @@ class RegExpConstructorInvokeNode extends DataFlow::InvokeNode {
16111611
* Gets the AST of the regular expression created here, provided that the
16121612
* first argument is a string literal.
16131613
*/
1614-
RegExpTerm getRoot() { result = this.getArgument(0).asExpr().(StringLiteral).asRegExp() }
1614+
RegExpTerm getRoot() {
1615+
result = this.getArgument(0).asExpr().(StringLiteral).asRegExp()
1616+
or
1617+
// In case someone writes `new RegExp(/foo/)` for some reason
1618+
result = this.getArgument(0).asExpr().(RegExpLiteral).getRoot()
1619+
}
16151620

16161621
/**
16171622
* Gets the flags provided in the second argument, or an empty string if no

javascript/ql/lib/semmle/javascript/security/TaintedUrlSuffix.qll

Lines changed: 3 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -4,114 +4,15 @@
44
*/
55

66
import javascript
7-
private import semmle.javascript.dataflow.internal.DataFlowPrivate as DataFlowPrivate
87

98
/**
109
* Provides a flow label for reasoning about URLs with a tainted query and fragment part,
1110
* which we collectively refer to as the "suffix" of the URL.
1211
*/
1312
module TaintedUrlSuffix {
14-
private import DataFlow
13+
import TaintedUrlSuffixCustomizations::TaintedUrlSuffix
1514

16-
/**
17-
* The flow label representing a URL with a tainted query and fragment part.
18-
*
19-
* Can also be accessed using `TaintedUrlSuffix::label()`.
20-
*/
21-
class TaintedUrlSuffixLabel extends FlowLabel {
22-
TaintedUrlSuffixLabel() { this = "tainted-url-suffix" }
23-
}
24-
25-
/**
26-
* Gets the flow label representing a URL with a tainted query and fragment part.
27-
*/
28-
FlowLabel label() { result instanceof TaintedUrlSuffixLabel }
29-
30-
/** Gets a remote flow source that is a tainted URL query or fragment part from `window.location`. */
31-
ClientSideRemoteFlowSource source() {
32-
result = DOM::locationRef().getAPropertyRead(["search", "hash"])
33-
or
34-
result = DOM::locationSource()
35-
or
36-
result.getKind().isUrl()
37-
}
38-
39-
/**
40-
* Holds if `node` should be a barrier for the given `label`.
41-
*
42-
* This should be used in the `isBarrier` predicate of a configuration that uses the tainted-url-suffix
43-
* label.
44-
*/
45-
predicate isBarrier(Node node, FlowLabel label) {
46-
label = label() and
47-
DataFlowPrivate::optionalBarrier(node, "split-url-suffix")
48-
}
49-
50-
/**
51-
* Holds if there is a flow step `src -> dst` involving the URL suffix taint label.
52-
*
53-
* This handles steps through string operations, promises, URL parsers, and URL accessors.
54-
*/
55-
predicate step(Node src, Node dst, FlowLabel srclbl, FlowLabel dstlbl) {
56-
// Transition from tainted-url-suffix to general taint when entering the second array element
57-
// of a split('#') or split('?') array.
58-
//
59-
// x [tainted-url-suffix] --> x.split('#') [array element 1] [taint]
60-
//
61-
// Technically we should also preverse tainted-url-suffix when entering the first array element of such
62-
// a split, but this mostly leads to FPs since we currently don't track if the taint has been through URI-decoding.
63-
// (The query/fragment parts are often URI-decoded in practice, but not the other URL parts are not)
64-
srclbl = label() and
65-
dstlbl.isTaint() and
66-
DataFlowPrivate::optionalStep(src, "split-url-suffix-post", dst)
67-
or
68-
// Transition from URL suffix to full taint when extracting the query/fragment part.
69-
srclbl = label() and
70-
dstlbl.isTaint() and
71-
(
72-
exists(MethodCallNode call, string name |
73-
src = call.getReceiver() and
74-
dst = call and
75-
name = call.getMethodName()
76-
|
77-
// Substring that is not a prefix
78-
name = StringOps::substringMethodName() and
79-
not call.getArgument(0).getIntValue() = 0
80-
or
81-
// Replace '#' and '?' with nothing
82-
name = "replace" and
83-
call.getArgument(0).getStringValue() = ["#", "?"] and
84-
call.getArgument(1).getStringValue() = ""
85-
or
86-
// The `get` call in `url.searchParams.get(x)` and `url.hashParams.get(x)`
87-
// The step should be safe since nothing else reachable by this flow label supports a method named 'get'.
88-
name = "get"
89-
or
90-
// Methods on URL objects from the Closure library
91-
name = "getDecodedQuery"
92-
or
93-
name = "getFragment"
94-
or
95-
name = "getParameterValue"
96-
or
97-
name = "getParameterValues"
98-
or
99-
name = "getQueryData"
100-
)
101-
or
102-
exists(PropRead read |
103-
src = read.getBase() and
104-
dst = read and
105-
// Unlike the `search` property, the `query` property from `url.parse` does not include the `?`.
106-
read.getPropertyName() = "query"
107-
)
108-
or
109-
// Assume calls to regexp.exec always extract query/fragment parameters.
110-
exists(MethodCallNode call |
111-
call = any(RegExpLiteral re).flow().(DataFlow::SourceNode).getAMethodCall("exec") and
112-
src = call.getArgument(0) and
113-
dst = call
114-
)
115-
)
15+
private class ConcreteTaintedUrlSuffixLabel extends TaintedUrlSuffixLabel {
16+
ConcreteTaintedUrlSuffixLabel() { this = this }
11617
}
11718
}
Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
/**
2+
* Provides a flow label for reasoning about URLs with a tainted query and fragment part,
3+
* which we collectively refer to as the "suffix" of the URL.
4+
*/
5+
6+
import javascript
7+
private import semmle.javascript.dataflow.internal.DataFlowPrivate as DataFlowPrivate
8+
9+
/**
10+
* Provides a flow label for reasoning about URLs with a tainted query and fragment part,
11+
* which we collectively refer to as the "suffix" of the URL.
12+
*/
13+
module TaintedUrlSuffix {
14+
private import DataFlow
15+
16+
/**
17+
* The flow label representing a URL with a tainted query and fragment part.
18+
*
19+
* Can also be accessed using `TaintedUrlSuffix::label()`.
20+
*/
21+
abstract class TaintedUrlSuffixLabel extends FlowLabel {
22+
TaintedUrlSuffixLabel() { this = "tainted-url-suffix" }
23+
}
24+
25+
/**
26+
* Gets the flow label representing a URL with a tainted query and fragment part.
27+
*/
28+
FlowLabel label() { result instanceof TaintedUrlSuffixLabel }
29+
30+
/** Gets a remote flow source that is a tainted URL query or fragment part from `window.location`. */
31+
ClientSideRemoteFlowSource source() {
32+
result = DOM::locationRef().getAPropertyRead(["search", "hash"])
33+
or
34+
result = DOM::locationSource()
35+
or
36+
result.getKind().isUrl()
37+
}
38+
39+
/**
40+
* Holds if `node` should be a barrier for the given `label`.
41+
*
42+
* This should be used in the `isBarrier` predicate of a configuration that uses the tainted-url-suffix
43+
* label.
44+
*/
45+
predicate isBarrier(Node node, FlowLabel label) {
46+
label = label() and
47+
DataFlowPrivate::optionalBarrier(node, "split-url-suffix")
48+
}
49+
50+
/**
51+
* Holds if there is a flow step `src -> dst` involving the URL suffix taint label.
52+
*
53+
* This handles steps through string operations, promises, URL parsers, and URL accessors.
54+
*/
55+
predicate step(Node src, Node dst, FlowLabel srclbl, FlowLabel dstlbl) {
56+
// Transition from tainted-url-suffix to general taint when entering the second array element
57+
// of a split('#') or split('?') array.
58+
//
59+
// x [tainted-url-suffix] --> x.split('#') [array element 1] [taint]
60+
//
61+
// Technically we should also preverse tainted-url-suffix when entering the first array element of such
62+
// a split, but this mostly leads to FPs since we currently don't track if the taint has been through URI-decoding.
63+
// (The query/fragment parts are often URI-decoded in practice, but not the other URL parts are not)
64+
srclbl = label() and
65+
dstlbl.isTaint() and
66+
DataFlowPrivate::optionalStep(src, "split-url-suffix-post", dst)
67+
or
68+
// Transition from URL suffix to full taint when extracting the query/fragment part.
69+
srclbl = label() and
70+
dstlbl.isTaint() and
71+
(
72+
exists(MethodCallNode call, string name |
73+
src = call.getReceiver() and
74+
dst = call and
75+
name = call.getMethodName()
76+
|
77+
// Substring that is not a prefix
78+
name = StringOps::substringMethodName() and
79+
not call.getArgument(0).getIntValue() = 0
80+
or
81+
// Replace '#' and '?' with nothing
82+
name = "replace" and
83+
call.getArgument(0).getStringValue() = ["#", "?"] and
84+
call.getArgument(1).getStringValue() = ""
85+
or
86+
// The `get` call in `url.searchParams.get(x)` and `url.hashParams.get(x)`
87+
// The step should be safe since nothing else reachable by this flow label supports a method named 'get'.
88+
name = "get"
89+
or
90+
// Methods on URL objects from the Closure library
91+
name = "getDecodedQuery"
92+
or
93+
name = "getFragment"
94+
or
95+
name = "getParameterValue"
96+
or
97+
name = "getParameterValues"
98+
or
99+
name = "getQueryData"
100+
)
101+
or
102+
exists(PropRead read |
103+
src = read.getBase() and
104+
dst = read and
105+
// Unlike the `search` property, the `query` property from `url.parse` does not include the `?`.
106+
read.getPropertyName() = "query"
107+
)
108+
or
109+
exists(MethodCallNode call, DataFlow::RegExpCreationNode re |
110+
(
111+
call = re.getAMethodCall("exec") and
112+
src = call.getArgument(0) and
113+
dst = call
114+
or
115+
call.getMethodName() = ["match", "matchAll"] and
116+
re.flowsTo(call.getArgument(0)) and
117+
src = call.getReceiver() and
118+
dst = call
119+
)
120+
|
121+
captureAfterSuffixIndicator(re.getRoot().getAChild*())
122+
or
123+
// If the regexp is unknown, assume it will extract the URL suffix
124+
not exists(re.getRoot())
125+
)
126+
)
127+
}
128+
129+
/** Holds if the `n`th child of `seq` contains a character indicating that everything thereafter is part of the suffix */
130+
private predicate containsSuffixIndicator(RegExpSequence seq, int n) {
131+
// Also include '=' as it usually only appears in the URL suffix
132+
seq.getChild(n).getAChild*().(RegExpConstant).getValue().regexpMatch(".*[?#=].*")
133+
}
134+
135+
/** Holds if the `n`th child of `seq` contains a capture group. */
136+
private predicate containsCaptureGroup(RegExpSequence seq, int n) {
137+
seq.getChild(n).getAChild*().(RegExpGroup).isCapture()
138+
}
139+
140+
/**
141+
* Holds if `seq` contains a capture group that will likely match path of the URL suffix,
142+
* thereby extracting tainted data.
143+
*
144+
* For example, `/#(.*)/.exec(url)` will extract the tainted URL suffix from `url`.
145+
*/
146+
private predicate captureAfterSuffixIndicator(RegExpSequence seq) {
147+
exists(int suffix, int capture |
148+
containsSuffixIndicator(seq, suffix) and
149+
containsCaptureGroup(seq, capture) and
150+
suffix < capture
151+
)
152+
}
153+
}

javascript/ql/lib/semmle/javascript/security/dataflow/ClientSideUrlRedirectCustomizations.qll

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
*/
66

77
import javascript
8+
private import semmle.javascript.security.TaintedUrlSuffixCustomizations
89

910
module ClientSideUrlRedirect {
1011
/**
@@ -31,12 +32,12 @@ module ClientSideUrlRedirect {
3132
abstract class Sanitizer extends DataFlow::Node { }
3233

3334
/**
35+
* DEPRECATED. Replaced by functionality from the `TaintedUrlSuffix` library.
36+
*
3437
* A flow label for values that represent the URL of the current document, and
3538
* hence are only partially user-controlled.
3639
*/
37-
abstract class DocumentUrl extends DataFlow::FlowLabel {
38-
DocumentUrl() { this = "document.url" } // TODO: replace with TaintedUrlSuffix
39-
}
40+
deprecated class DocumentUrl = TaintedUrlSuffix::TaintedUrlSuffixLabel;
4041

4142
/**
4243
* DEPRECATED: Use `ActiveThreatModelSource` from Concepts instead!
@@ -50,8 +51,8 @@ module ClientSideUrlRedirect {
5051
ActiveThreatModelSourceAsSource() { not this.(ClientSideRemoteFlowSource).getKind().isPath() }
5152

5253
override DataFlow::FlowLabel getAFlowLabel() {
53-
if this.(ClientSideRemoteFlowSource).getKind().isUrl()
54-
then result instanceof DocumentUrl
54+
if this = TaintedUrlSuffix::source()
55+
then result = TaintedUrlSuffix::label()
5556
else result.isTaint()
5657
}
5758
}
@@ -60,7 +61,7 @@ module ClientSideUrlRedirect {
6061
* Holds if `node` extracts a part of a URL that does not contain the suffix.
6162
*/
6263
pragma[inline]
63-
predicate isPrefixExtraction(DataFlow::MethodCallNode node) {
64+
deprecated predicate isPrefixExtraction(DataFlow::MethodCallNode node) {
6465
// Block flow through prefix-extraction `substring(0, ...)` and `split("#")[0]`
6566
node.getMethodName() = [StringOps::substringMethodName(), "split"] and
6667
not untrustedUrlSubstring(_, node)
@@ -70,7 +71,7 @@ module ClientSideUrlRedirect {
7071
* Holds if `substring` refers to a substring of `base` which is considered untrusted
7172
* when `base` is the current URL.
7273
*/
73-
predicate untrustedUrlSubstring(DataFlow::Node base, DataFlow::Node substring) {
74+
deprecated predicate untrustedUrlSubstring(DataFlow::Node base, DataFlow::Node substring) {
7475
exists(DataFlow::MethodCallNode mcn, string methodName |
7576
mcn = substring and mcn.calls(base, methodName)
7677
|

javascript/ql/lib/semmle/javascript/security/dataflow/ClientSideUrlRedirectQuery.qll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,10 @@
1010
import javascript
1111
import UrlConcatenation
1212
import ClientSideUrlRedirectCustomizations::ClientSideUrlRedirect
13+
import semmle.javascript.security.TaintedUrlSuffix
1314

1415
// Materialize flow labels
15-
private class ConcreteDocumentUrl extends DocumentUrl {
16+
deprecated private class ConcreteDocumentUrl extends DocumentUrl {
1617
ConcreteDocumentUrl() { this = this }
1718
}
1819

@@ -35,8 +36,7 @@ module ClientSideUrlRedirectConfig implements DataFlow::StateConfigSig {
3536
}
3637

3738
predicate isBarrier(DataFlow::Node node, DataFlow::FlowLabel state) {
38-
isPrefixExtraction(node) and
39-
state instanceof DocumentUrl
39+
TaintedUrlSuffix::isBarrier(node, state)
4040
}
4141

4242
predicate isBarrierOut(DataFlow::Node node) { hostnameSanitizingPrefixEdge(node, _) }
@@ -47,9 +47,7 @@ module ClientSideUrlRedirectConfig implements DataFlow::StateConfigSig {
4747
DataFlow::Node node1, DataFlow::FlowLabel state1, DataFlow::Node node2,
4848
DataFlow::FlowLabel state2
4949
) {
50-
untrustedUrlSubstring(node1, node2) and
51-
state1 instanceof DocumentUrl and
52-
state2.isTaint()
50+
TaintedUrlSuffix::step(node1, node2, state1, state2)
5351
or
5452
exists(HtmlSanitizerCall call |
5553
node1 = call.getInput() and

0 commit comments

Comments
 (0)