Skip to content

Commit 42bca1a

Browse files
authored
Merge pull request github#3824 from asger-semmle/js/static-regexp-capture-group-step
Approved by erik-krogh, esbena
2 parents c850938 + 1e5f846 commit 42bca1a

File tree

4 files changed

+126
-0
lines changed

4 files changed

+126
-0
lines changed

javascript/ql/src/semmle/javascript/StandardLibrary.qll

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,11 @@ class StringReplaceCall extends DataFlow::MethodCallNode {
121121
*/
122122
DataFlow::Node getRawReplacement() { result = getArgument(1) }
123123

124+
/**
125+
* Gets a function flowing into the second argument of this call to `replace`.
126+
*/
127+
DataFlow::FunctionNode getReplacementCallback() { result = getCallback(1) }
128+
124129
/**
125130
* Holds if this is a global replacement, that is, the first argument is a regular expression
126131
* with the `g` flag.

javascript/ql/src/semmle/javascript/dataflow/TaintTracking.qll

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -705,6 +705,74 @@ module TaintTracking {
705705
}
706706
}
707707

708+
private module RegExpCaptureSteps {
709+
/** Gets a reference to a string derived from the most recent RegExp match, such as `RegExp.$1`. */
710+
private DataFlow::PropRead getAStaticCaptureRef() {
711+
result =
712+
DataFlow::globalVarRef("RegExp")
713+
.getAPropertyRead(["$" + [1 .. 9], "input", "lastMatch", "leftContext", "rightContext",
714+
"$&", "$^", "$`"])
715+
}
716+
717+
/**
718+
* Gets a control-flow node where `input` is used in a RegExp match.
719+
*/
720+
private ControlFlowNode getACaptureSetter(DataFlow::Node input) {
721+
exists(DataFlow::MethodCallNode call | result = call.asExpr() |
722+
call.getMethodName() = ["search", "replace", "match"] and input = call.getReceiver()
723+
or
724+
call.getMethodName() = ["test", "exec"] and input = call.getArgument(0)
725+
)
726+
}
727+
728+
/**
729+
* Gets a control-flow node that can locally reach the given static capture reference
730+
* without passing through a capture setter.
731+
*
732+
* This is essentially an intraprocedural def-use analysis that ignores potential
733+
* side effects from calls.
734+
*/
735+
private ControlFlowNode getANodeReachingCaptureRef(DataFlow::PropRead read) {
736+
result = read.asExpr() and
737+
read = getAStaticCaptureRef()
738+
or
739+
exists(ControlFlowNode mid |
740+
result = getANodeReachingCaptureRefAux(read, mid) and
741+
not mid = getACaptureSetter(_)
742+
)
743+
}
744+
745+
pragma[nomagic]
746+
private ControlFlowNode getANodeReachingCaptureRefAux(
747+
DataFlow::PropRead read, ControlFlowNode mid
748+
) {
749+
mid = getANodeReachingCaptureRef(read) and
750+
result = mid.getAPredecessor()
751+
}
752+
753+
/**
754+
* Holds if there is a step `pred -> succ` from the input of a RegExp match to
755+
* a static property of `RegExp`.
756+
*/
757+
private predicate staticRegExpCaptureStep(DataFlow::Node pred, DataFlow::Node succ) {
758+
getACaptureSetter(pred) = getANodeReachingCaptureRef(succ)
759+
or
760+
exists(StringReplaceCall replace |
761+
getANodeReachingCaptureRef(succ) = replace.getReplacementCallback().getFunction().getEntry() and
762+
pred = replace.getReceiver()
763+
)
764+
}
765+
766+
private class StaticRegExpCaptureStep extends AdditionalTaintStep {
767+
StaticRegExpCaptureStep() { staticRegExpCaptureStep(this, _) }
768+
769+
override predicate step(DataFlow::Node pred, DataFlow::Node succ) {
770+
pred = this and
771+
staticRegExpCaptureStep(this, succ)
772+
}
773+
}
774+
}
775+
708776
/**
709777
* A conditional checking a tainted string against a regular expression, which is
710778
* considered to be a sanitizer for all configurations.

javascript/ql/test/library-tests/TaintTracking/BasicTaintTracking.expected

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,14 @@ typeInferenceMismatch
103103
| spread.js:2:15:2:22 | source() | spread.js:5:8:5:43 | { f: 'h ... orld' } |
104104
| spread.js:2:15:2:22 | source() | spread.js:7:8:7:19 | [ ...taint ] |
105105
| spread.js:2:15:2:22 | source() | spread.js:8:8:8:28 | [ 1, 2, ... nt, 3 ] |
106+
| static-capture-groups.js:2:17:2:24 | source() | static-capture-groups.js:5:14:5:22 | RegExp.$1 |
107+
| static-capture-groups.js:2:17:2:24 | source() | static-capture-groups.js:15:14:15:22 | RegExp.$1 |
108+
| static-capture-groups.js:2:17:2:24 | source() | static-capture-groups.js:17:14:17:22 | RegExp.$1 |
109+
| static-capture-groups.js:2:17:2:24 | source() | static-capture-groups.js:21:14:21:22 | RegExp.$1 |
110+
| static-capture-groups.js:2:17:2:24 | source() | static-capture-groups.js:24:14:24:22 | RegExp.$1 |
111+
| static-capture-groups.js:2:17:2:24 | source() | static-capture-groups.js:27:14:27:22 | RegExp.$1 |
112+
| static-capture-groups.js:32:17:32:24 | source() | static-capture-groups.js:38:10:38:18 | RegExp.$1 |
113+
| static-capture-groups.js:42:12:42:19 | source() | static-capture-groups.js:43:14:43:22 | RegExp.$1 |
106114
| thisAssignments.js:4:17:4:24 | source() | thisAssignments.js:5:10:5:18 | obj.field |
107115
| thisAssignments.js:7:19:7:26 | source() | thisAssignments.js:8:10:8:20 | this.field2 |
108116
| tst.js:2:13:2:20 | source() | tst.js:4:10:4:10 | x |
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
function test(x) {
2+
let taint = source();
3+
4+
if (/Hello (.*)/.exec(taint)) {
5+
sink(RegExp.$1); // NOT OK
6+
}
7+
8+
if (/Foo (.*)/.exec(x)) {
9+
sink(RegExp.$1); // OK
10+
} else {
11+
sink(RegExp.$1); // NOT OK [INCONSISTENCY] - previous capture group remains
12+
}
13+
14+
if (/Hello ([a-zA-Z]+)/.exec(taint)) {
15+
sink(RegExp.$1); // OK [INCONSISTENCY] - capture group is sanitized
16+
} else {
17+
sink(RegExp.$1); // NOT OK [found but for the wrong reason] - original capture group possibly remains
18+
}
19+
20+
if (/Hello (.*)/.exec(taint) && something()) {
21+
sink(RegExp.$1); // NOT OK
22+
}
23+
if (something() && /Hello (.*)/.exec(taint)) {
24+
sink(RegExp.$1); // NOT OK
25+
}
26+
if (/First (.*)/.exec(taint) || /Second (.*)/.exec(taint)) {
27+
sink(RegExp.$1); // NOT OK
28+
}
29+
}
30+
31+
function test2(x) {
32+
var taint = source();
33+
if (something()) {
34+
if (/Hello (.*)/.exec(taint)) {
35+
something();
36+
}
37+
}
38+
sink(RegExp.$1); // NOT OK
39+
}
40+
41+
function replaceCallback() {
42+
return source().replace(/(\w+)/, () => {
43+
sink(RegExp.$1); // NOT OK
44+
});
45+
}

0 commit comments

Comments
 (0)