Skip to content

Commit 5e4c090

Browse files
authored
Merge pull request github#17412 from asgerf/jss/array-index-constant
JS: Fix handling of constant array index reads, and fix the fallout
2 parents 094112c + 7ba6995 commit 5e4c090

File tree

25 files changed

+333
-278
lines changed

25 files changed

+333
-278
lines changed

javascript/ql/lib/semmle/javascript/dataflow/TaintTracking.qll

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,7 @@ module TaintTracking {
255255
exists(StringSplitCall c |
256256
c.getBaseString().getALocalSource() =
257257
[DOM::locationRef(), DOM::locationRef().getAPropertyRead("href")] and
258-
c.getSeparator() = "?" and
258+
c.getSeparator() = ["?", "#"] and
259259
read = c.getAPropertyRead("0")
260260
)
261261
}
@@ -356,6 +356,16 @@ module TaintTracking {
356356
}
357357
}
358358

359+
private class LegacySplitTaintStep extends LegacyTaintStep {
360+
override predicate stringManipulationStep(DataFlow::Node pred, DataFlow::Node target) {
361+
exists(DataFlow::MethodCallNode call |
362+
call.getMethodName() = "split" and
363+
pred = call.getReceiver() and
364+
target = call
365+
)
366+
}
367+
}
368+
359369
/**
360370
* A taint propagating data flow edge arising from string manipulation
361371
* functions defined in the standard library.
@@ -372,9 +382,8 @@ module TaintTracking {
372382
[
373383
"anchor", "big", "blink", "bold", "concat", "fixed", "fontcolor", "fontsize",
374384
"italics", "link", "padEnd", "padStart", "repeat", "replace", "replaceAll", "slice",
375-
"small", "split", "strike", "sub", "substr", "substring", "sup",
376-
"toLocaleLowerCase", "toLocaleUpperCase", "toLowerCase", "toUpperCase", "trim",
377-
"trimLeft", "trimRight"
385+
"small", "strike", "sub", "substr", "substring", "sup", "toLocaleLowerCase",
386+
"toLocaleUpperCase", "toLowerCase", "toUpperCase", "trim", "trimLeft", "trimRight"
378387
]
379388
or
380389
// sorted, interesting, properties of Object.prototype

javascript/ql/lib/semmle/javascript/dataflow/internal/Contents.qll

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -93,14 +93,20 @@ module Private {
9393
// than an ordinary content component. These special content sets should never appear in a step.
9494
MkAwaited() or
9595
MkAnyPropertyDeep() or
96-
MkArrayElementDeep()
96+
MkArrayElementDeep() or
97+
MkOptionalStep(string name) { isAccessPathTokenPresent("OptionalStep", name) } or
98+
MkOptionalBarrier(string name) { isAccessPathTokenPresent("OptionalBarrier", name) }
9799

98100
/**
99101
* Holds if `cs` is used to encode a special operation as a content component, but should not
100102
* be treated as an ordinary content component.
101103
*/
102104
predicate isSpecialContentSet(ContentSet cs) {
103-
cs = MkAwaited() or cs = MkAnyPropertyDeep() or cs = MkArrayElementDeep()
105+
cs = MkAwaited() or
106+
cs = MkAnyPropertyDeep() or
107+
cs = MkArrayElementDeep() or
108+
cs instanceof MkOptionalStep or
109+
cs instanceof MkOptionalBarrier
104110
}
105111
}
106112

@@ -254,14 +260,8 @@ module Public {
254260
/** Gets the singleton content to be accessed. */
255261
Content asSingleton() { this = MkSingletonContent(result) }
256262

257-
/** Gets the property name to be accessed. */
258-
PropertyName asPropertyName() {
259-
// TODO: array indices should be mapped to a ContentSet that also reads from UnknownArrayElement
260-
result = this.asSingleton().asPropertyName()
261-
}
262-
263-
/** Gets the array index to be accessed. */
264-
int asArrayIndex() { result = this.asSingleton().asArrayIndex() }
263+
/** Gets the property name to be accessed, provided that this is a singleton content set. */
264+
PropertyName asPropertyName() { result = this.asSingleton().asPropertyName() }
265265

266266
/**
267267
* Gets a string representation of this content set.
@@ -294,6 +294,16 @@ module Public {
294294
or
295295
this = MkAnyCapturedContent() and
296296
result = "AnyCapturedContent"
297+
or
298+
exists(string name |
299+
this = MkOptionalStep(name) and
300+
result = "OptionalStep[" + name + "]"
301+
)
302+
or
303+
exists(string name |
304+
this = MkOptionalBarrier(name) and
305+
result = "OptionalBarrier[" + name + "]"
306+
)
297307
}
298308
}
299309

javascript/ql/lib/semmle/javascript/dataflow/internal/DataFlowPrivate.qll

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1035,6 +1035,11 @@ predicate simpleLocalFlowStep(Node node1, Node node2) {
10351035
FlowSummaryPrivate::Steps::summaryReadStep(input, MkAwaited(), output) and
10361036
node1 = TFlowSummaryNode(input) and
10371037
node2 = TFlowSummaryNode(output)
1038+
or
1039+
// Add flow through optional barriers. This step is then blocked by the barrier for queries that choose to use the barrier.
1040+
FlowSummaryPrivate::Steps::summaryReadStep(input, MkOptionalBarrier(_), output) and
1041+
node1 = TFlowSummaryNode(input) and
1042+
node2 = TFlowSummaryNode(output)
10381043
)
10391044
or
10401045
VariableCaptureOutput::localFlowStep(getClosureNode(node1), getClosureNode(node2))
@@ -1103,7 +1108,12 @@ predicate readStep(Node node1, ContentSet c, Node node2) {
11031108
node1 = read.getBase() and
11041109
node2 = read
11051110
|
1106-
c.asPropertyName() = read.getPropertyName()
1111+
exists(PropertyName name | read.getPropertyName() = name |
1112+
not exists(name.asArrayIndex()) and
1113+
c = ContentSet::property(name)
1114+
or
1115+
c = ContentSet::arrayElementKnown(name.asArrayIndex())
1116+
)
11071117
or
11081118
not exists(read.getPropertyName()) and
11091119
c = ContentSet::arrayElement()
@@ -1157,7 +1167,7 @@ predicate readStep(Node node1, ContentSet c, Node node2) {
11571167
node2 = TRestParameterStoreNode(function, content)
11581168
|
11591169
// shift known array indices
1160-
c.asArrayIndex() = content.asArrayIndex() + restIndex
1170+
c.asSingleton().asArrayIndex() = content.asArrayIndex() + restIndex
11611171
or
11621172
content.isUnknownArrayElement() and // TODO: don't read unknown array elements from static array
11631173
c = ContentSet::arrayElementUnknown()
@@ -1174,7 +1184,7 @@ predicate readStep(Node node1, ContentSet c, Node node2) {
11741184
c = ContentSet::arrayElement() and // unknown start index when not the first spread operator
11751185
storeContent.isUnknownArrayElement()
11761186
else (
1177-
storeContent.asArrayIndex() = n + c.asArrayIndex()
1187+
storeContent.asArrayIndex() = n + c.asSingleton().asArrayIndex()
11781188
or
11791189
storeContent.isUnknownArrayElement() and c.asSingleton() = storeContent
11801190
)
@@ -1185,7 +1195,7 @@ predicate readStep(Node node1, ContentSet c, Node node2) {
11851195
node1 = TFlowSummaryDynamicParameterArrayNode(parameter.getSummarizedCallable()) and
11861196
node2 = parameter and
11871197
(
1188-
c.asArrayIndex() = pos.asPositional()
1198+
c.asSingleton().asArrayIndex() = pos.asPositional()
11891199
or
11901200
c = ContentSet::arrayElementLowerBound(pos.asPositionalLowerBound())
11911201
)
@@ -1256,7 +1266,7 @@ predicate storeStep(Node node1, ContentSet c, Node node2) {
12561266
exists(InvokeExpr invoke, int n |
12571267
node1 = TValueNode(invoke.getArgument(n)) and
12581268
node2 = TStaticArgumentArrayNode(invoke) and
1259-
c.asArrayIndex() = n and
1269+
c.asSingleton().asArrayIndex() = n and
12601270
not n >= firstSpreadArgumentIndex(invoke)
12611271
)
12621272
or
@@ -1384,3 +1394,20 @@ class ArgumentNode extends DataFlow::Node {
13841394
class ParameterNode extends DataFlow::Node {
13851395
ParameterNode() { isParameterNodeImpl(this, _, _) }
13861396
}
1397+
1398+
cached
1399+
private module OptionalSteps {
1400+
cached
1401+
predicate optionalStep(Node node1, string name, Node node2) {
1402+
FlowSummaryPrivate::Steps::summaryReadStep(node1.(FlowSummaryNode).getSummaryNode(),
1403+
MkOptionalStep(name), node2.(FlowSummaryNode).getSummaryNode())
1404+
}
1405+
1406+
cached
1407+
predicate optionalBarrier(Node node, string name) {
1408+
FlowSummaryPrivate::Steps::summaryReadStep(_, MkOptionalBarrier(name),
1409+
node.(FlowSummaryNode).getSummaryNode())
1410+
}
1411+
}
1412+
1413+
import OptionalSteps

javascript/ql/lib/semmle/javascript/dataflow/internal/FlowSummaryPrivate.qll

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,10 @@ private string encodeContentAux(ContentSet cs, string arg) {
9595
cs = MkAnyPropertyDeep() and result = "AnyMemberDeep" and arg = ""
9696
or
9797
cs = MkArrayElementDeep() and result = "ArrayElementDeep" and arg = ""
98+
or
99+
cs = MkOptionalStep(arg) and result = "OptionalStep"
100+
or
101+
cs = MkOptionalBarrier(arg) and result = "OptionalBarrier"
98102
}
99103

100104
/**
@@ -122,10 +126,7 @@ string encodeArgumentPosition(ArgumentPosition pos) {
122126
}
123127

124128
/** Gets the return kind corresponding to specification `"ReturnValue"`. */
125-
ReturnKind getStandardReturnValueKind() { result = MkNormalReturnKind() }
126-
127-
/** Gets the return kind corresponding to specification `"ReturnValue"`. */
128-
MkNormalReturnKind getReturnValueKind() { any() }
129+
ReturnKind getStandardReturnValueKind() { result = MkNormalReturnKind() and Stage::ref() }
129130

130131
private module FlowSummaryStepInput implements Private::StepsInputSig {
131132
DataFlowCall getACall(SummarizedCallable sc) {
@@ -237,3 +238,12 @@ ContentSet decodeUnknownWithoutContent(AccessPathSyntax::AccessPathTokenBase tok
237238
*/
238239
bindingset[token]
239240
ContentSet decodeUnknownWithContent(AccessPathSyntax::AccessPathTokenBase token) { none() }
241+
242+
cached
243+
module Stage {
244+
cached
245+
predicate ref() { 1 = 1 }
246+
247+
cached
248+
predicate backref() { optionalStep(_, _, _) }
249+
}

javascript/ql/lib/semmle/javascript/dataflow/internal/TaintTrackingPrivate.qll

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,5 +61,7 @@ predicate defaultTaintSanitizer(DataFlow::Node node) {
6161
bindingset[node]
6262
predicate defaultImplicitTaintRead(DataFlow::Node node, ContentSet c) {
6363
exists(node) and
64-
c = [ContentSet::promiseValue(), ContentSet::arrayElement()]
64+
c = [ContentSet::promiseValue(), ContentSet::arrayElement()] and
65+
// Optional steps are added through isAdditionalFlowStep but we don't want the implicit reads
66+
not optionalStep(node, _, _)
6567
}

javascript/ql/lib/semmle/javascript/internal/flow_summaries/Arrays.qll

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -484,7 +484,13 @@ class Shift extends SummarizedCallable {
484484

485485
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
486486
preservesValue = true and
487-
input = "Argument[this].ArrayElement" and
487+
input = "Argument[this].ArrayElement[0]" and
488+
output = "ReturnValue"
489+
or
490+
// ArrayElement[0] in the above summary is not automatically converted to a taint step, so manully add
491+
// one from the array to the return value.
492+
preservesValue = false and
493+
input = "Argument[this]" and
488494
output = "ReturnValue"
489495
}
490496
}

javascript/ql/lib/semmle/javascript/internal/flow_summaries/ForOfLoops.qll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,10 @@ class ForOfLoopStep extends AdditionalFlowInternal {
4848
) {
4949
exists(ForOfStmt stmt |
5050
pred = getSynthesizedNode(stmt, "for-of-map-key") and
51-
contents.asArrayIndex() = 0
51+
contents.asSingleton().asArrayIndex() = 0
5252
or
5353
pred = getSynthesizedNode(stmt, "for-of-map-value") and
54-
contents.asArrayIndex() = 1
54+
contents.asSingleton().asArrayIndex() = 1
5555
|
5656
succ = DataFlow::lvalueNode(stmt.getLValue())
5757
)

javascript/ql/lib/semmle/javascript/internal/flow_summaries/Strings.qll

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,9 @@ class StringSplit extends SummarizedCallable {
5555
StringSplit() { this = "String#split" }
5656

5757
override DataFlow::MethodCallNode getACallSimple() {
58-
result.getMethodName() = "split" and result.getNumArgument() = 1
58+
result.getMethodName() = "split" and
59+
result.getNumArgument() = [1, 2] and
60+
not result.getArgument(0).getStringValue() = ["#", "?"]
5961
}
6062

6163
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
@@ -64,3 +66,36 @@ class StringSplit extends SummarizedCallable {
6466
output = "ReturnValue.ArrayElement"
6567
}
6668
}
69+
70+
/**
71+
* A call of form `x.split("#")` or `x.split("?")`.
72+
*
73+
* These are of special significance when tracking a tainted URL suffix, such as `window.location.href`,
74+
* because the first element of the resulting array should not be considered tainted.
75+
*
76+
* This summary defaults to the same behaviour as the general `.split()` case, but it contains optional steps
77+
* and barriers named `tainted-url-suffix` that should be activated when tracking a tainted URL suffix.
78+
*/
79+
class StringSplitHashOrQuestionMark extends SummarizedCallable {
80+
StringSplitHashOrQuestionMark() { this = "String#split with '#' or '?'" }
81+
82+
override DataFlow::MethodCallNode getACallSimple() {
83+
result.getMethodName() = "split" and
84+
result.getNumArgument() = [1, 2] and
85+
result.getArgument(0).getStringValue() = ["#", "?"]
86+
}
87+
88+
override predicate propagatesFlowExt(string input, string output, boolean preservesValue) {
89+
preservesValue = false and
90+
(
91+
input = "Argument[this].OptionalBarrier[split-url-suffix]" and
92+
output = "ReturnValue.ArrayElement"
93+
or
94+
input = "Argument[this].OptionalStep[split-url-suffix-pre]" and
95+
output = "ReturnValue.ArrayElement[0]"
96+
or
97+
input = "Argument[this].OptionalStep[split-url-suffix-post]" and
98+
output = "ReturnValue.ArrayElement[1]" // TODO: support ArrayElement[1..]
99+
)
100+
}
101+
}

javascript/ql/lib/semmle/javascript/security/TaintedUrlSuffix.qll

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
*/
55

66
import javascript
7+
private import semmle.javascript.dataflow.internal.DataFlowPrivate as DataFlowPrivate
78

89
/**
910
* Provides a flow label for reasoning about URLs with a tainted query and fragment part,
@@ -35,14 +36,15 @@ module TaintedUrlSuffix {
3536
result.getKind().isUrl()
3637
}
3738

38-
/** Holds for `pred -> succ` is a step of form `x -> x.p` */
39-
private predicate isSafeLocationProp(DataFlow::PropRead read) {
40-
// Ignore properties that refer to the scheme, domain, port, auth, or path.
41-
read.getPropertyName() =
42-
[
43-
"protocol", "scheme", "host", "hostname", "domain", "origin", "port", "path", "pathname",
44-
"username", "password", "auth"
45-
]
39+
/**
40+
* Holds if `node` should be a barrier for the given `label`.
41+
*
42+
* This should be used in the `isBarrier` predicate of a configuration that uses the tainted-url-suffix
43+
* label.
44+
*/
45+
predicate isBarrier(Node node, FlowLabel label) {
46+
label = label() and
47+
DataFlowPrivate::optionalBarrier(node, "split-url-suffix")
4648
}
4749

4850
/**
@@ -51,11 +53,17 @@ module TaintedUrlSuffix {
5153
* This handles steps through string operations, promises, URL parsers, and URL accessors.
5254
*/
5355
predicate step(Node src, Node dst, FlowLabel srclbl, FlowLabel dstlbl) {
54-
// Inherit all ordinary taint steps except `x -> x.p` steps
56+
// Transition from tainted-url-suffix to general taint when entering the second array element
57+
// of a split('#') or split('?') array.
58+
//
59+
// x [tainted-url-suffix] --> x.split('#') [array element 1] [taint]
60+
//
61+
// Technically we should also preverse tainted-url-suffix when entering the first array element of such
62+
// a split, but this mostly leads to FPs since we currently don't track if the taint has been through URI-decoding.
63+
// (The query/fragment parts are often URI-decoded in practice, but not the other URL parts are not)
5564
srclbl = label() and
56-
dstlbl = label() and
57-
TaintTracking::AdditionalTaintStep::step(src, dst) and
58-
not isSafeLocationProp(dst)
65+
dstlbl.isTaint() and
66+
DataFlowPrivate::optionalStep(src, "split-url-suffix-post", dst)
5967
or
6068
// Transition from URL suffix to full taint when extracting the query/fragment part.
6169
srclbl = label() and
@@ -70,11 +78,6 @@ module TaintedUrlSuffix {
7078
name = StringOps::substringMethodName() and
7179
not call.getArgument(0).getIntValue() = 0
7280
or
73-
// Split around '#' or '?' and extract the suffix
74-
name = "split" and
75-
call.getArgument(0).getStringValue() = ["#", "?"] and
76-
not exists(call.getAPropertyRead("0")) // Avoid false flow to the prefix
77-
or
7881
// Replace '#' and '?' with nothing
7982
name = "replace" and
8083
call.getArgument(0).getStringValue() = ["#", "?"] and

javascript/ql/lib/semmle/javascript/security/dataflow/ClientSideUrlRedirectCustomizations.qll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ module ClientSideUrlRedirect {
3535
* hence are only partially user-controlled.
3636
*/
3737
abstract class DocumentUrl extends DataFlow::FlowLabel {
38-
DocumentUrl() { this = "document.url" }
38+
DocumentUrl() { this = "document.url" } // TODO: replace with TaintedUrlSuffix
3939
}
4040

4141
/** A source of remote user input, considered as a flow source for unvalidated URL redirects. */

0 commit comments

Comments
 (0)