Skip to content

Commit 06dd3ab

Browse files
committed
JS: Propagate into RegExp.$x
1 parent 17af8f7 commit 06dd3ab

File tree

3 files changed

+73
-3
lines changed

3 files changed

+73
-3
lines changed

javascript/ql/src/semmle/javascript/dataflow/TaintTracking.qll

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -705,6 +705,68 @@ module TaintTracking {
705705
}
706706
}
707707

708+
private module RegExpCaptureSteps {
709+
/** Gets a reference to a string derived from the most recent RegExp match, such as `RegExp.$1` */
710+
private DataFlow::PropRead getAStaticCaptureRef() {
711+
result =
712+
DataFlow::globalVarRef("RegExp")
713+
.getAPropertyRead(["$" + [1 .. 9], "input", "lastMatch", "leftContext", "rightContext",
714+
"$&", "$^", "$`"])
715+
}
716+
717+
/**
718+
* Gets a control-flow node where `input` is used in a RegExp match.
719+
*/
720+
private ControlFlowNode getACaptureSetter(DataFlow::Node input) {
721+
exists(DataFlow::MethodCallNode call | result = call.asExpr() |
722+
call.getMethodName() = ["search", "replace", "match"] and input = call.getReceiver()
723+
or
724+
call.getMethodName() = ["test", "exec"] and input = call.getArgument(0)
725+
)
726+
}
727+
728+
/**
729+
* Gets a control-flow node that can locally reach the given static capture reference
730+
* without passing through a capture setter.
731+
*
732+
* This is essentially an intraprocedural def-use analysis that ignores potential
733+
* side effects from calls.
734+
*/
735+
private ControlFlowNode getANodeReachingCaptureRef(DataFlow::PropRead read) {
736+
result = read.asExpr() and
737+
read = getAStaticCaptureRef()
738+
or
739+
exists(ControlFlowNode mid |
740+
mid = getANodeReachingCaptureRef(read) and
741+
not mid = getACaptureSetter(_) and
742+
result = mid.getAPredecessor()
743+
)
744+
}
745+
746+
/**
747+
* Holds if there is a step `pred -> succ` from the input of a RegExp match to
748+
* a static property of `RegExp` defined.
749+
*/
750+
private predicate staticRegExpCaptureStep(DataFlow::Node pred, DataFlow::Node succ) {
751+
getACaptureSetter(pred) = getANodeReachingCaptureRef(succ)
752+
or
753+
exists(DataFlow::MethodCallNode replace |
754+
replace.getMethodName() = "replace" and
755+
getANodeReachingCaptureRef(succ) = replace.getCallback(1).getFunction().getEntry() and
756+
pred = replace.getReceiver()
757+
)
758+
}
759+
760+
private class StaticRegExpCaptureStep extends AdditionalTaintStep {
761+
StaticRegExpCaptureStep() { staticRegExpCaptureStep(this, _) }
762+
763+
override predicate step(DataFlow::Node pred, DataFlow::Node succ) {
764+
pred = this and
765+
staticRegExpCaptureStep(this, succ)
766+
}
767+
}
768+
}
769+
708770
/**
709771
* A conditional checking a tainted string against a regular expression, which is
710772
* considered to be a sanitizer for all configurations.

javascript/ql/test/library-tests/TaintTracking/BasicTaintTracking.expected

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,14 @@ typeInferenceMismatch
103103
| spread.js:2:15:2:22 | source() | spread.js:5:8:5:43 | { f: 'h ... orld' } |
104104
| spread.js:2:15:2:22 | source() | spread.js:7:8:7:19 | [ ...taint ] |
105105
| spread.js:2:15:2:22 | source() | spread.js:8:8:8:28 | [ 1, 2, ... nt, 3 ] |
106+
| static-capture-groups.js:2:17:2:24 | source() | static-capture-groups.js:5:14:5:22 | RegExp.$1 |
107+
| static-capture-groups.js:2:17:2:24 | source() | static-capture-groups.js:15:14:15:22 | RegExp.$1 |
108+
| static-capture-groups.js:2:17:2:24 | source() | static-capture-groups.js:17:14:17:22 | RegExp.$1 |
109+
| static-capture-groups.js:2:17:2:24 | source() | static-capture-groups.js:21:14:21:22 | RegExp.$1 |
110+
| static-capture-groups.js:2:17:2:24 | source() | static-capture-groups.js:24:14:24:22 | RegExp.$1 |
111+
| static-capture-groups.js:2:17:2:24 | source() | static-capture-groups.js:27:14:27:22 | RegExp.$1 |
112+
| static-capture-groups.js:32:17:32:24 | source() | static-capture-groups.js:38:10:38:18 | RegExp.$1 |
113+
| static-capture-groups.js:42:12:42:19 | source() | static-capture-groups.js:43:14:43:22 | RegExp.$1 |
106114
| thisAssignments.js:4:17:4:24 | source() | thisAssignments.js:5:10:5:18 | obj.field |
107115
| thisAssignments.js:7:19:7:26 | source() | thisAssignments.js:8:10:8:20 | this.field2 |
108116
| tst.js:2:13:2:20 | source() | tst.js:4:10:4:10 | x |

javascript/ql/test/library-tests/TaintTracking/static-capture-groups.js

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,13 @@ function test(x) {
88
if (/Foo (.*)/.exec(x)) {
99
sink(RegExp.$1); // OK
1010
} else {
11-
sink(RegExp.$1); // NOT OK - previous capture group remains
11+
sink(RegExp.$1); // NOT OK [INCONSISTENCY] - previous capture group remains
1212
}
1313

1414
if (/Hello ([a-zA-Z]+)/.exec(taint)) {
15-
sink(RegExp.$1); // OK - capture group is sanitized
15+
sink(RegExp.$1); // OK [INCONSISTENCY] - capture group is sanitized
1616
} else {
17-
sink(RegExp.$1); // NOT OK - original capture group possibly remains
17+
sink(RegExp.$1); // NOT OK [found but for the wrong reason] - original capture group possibly remains
1818
}
1919

2020
if (/Hello (.*)/.exec(taint) && something()) {

0 commit comments

Comments
 (0)