Skip to content

Commit fa1a6ee

Browse files
committed
JS: Add StringOps::RegExpTest
1 parent 7265e94 commit fa1a6ee

File tree

4 files changed

+220
-0
lines changed

4 files changed

+220
-0
lines changed

javascript/ql/src/semmle/javascript/StringOps.qll

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -629,4 +629,143 @@ module StringOps {
629629
class HtmlConcatenationLeaf extends ConcatenationLeaf {
630630
HtmlConcatenationLeaf() { getRoot() instanceof HtmlConcatenationRoot }
631631
}
632+
633+
/**
634+
* A data flow node whose boolean value indicates whether a regexp matches a given string.
635+
*
636+
* For example, the condition of each of the following `if`-statements are `RegExpTest` nodes:
637+
* ```js
638+
* if (regexp.test(str)) { ... }
639+
* if (regexp.exec(str) != null) { ... }
640+
* if (str.matches(regexp)) { ... }
641+
* ```
642+
*
643+
* Note that `RegExpTest` represents a boolean-valued expression or one
644+
* that is coerced to a boolean, which is not always the same as the call that performs the
645+
* regexp-matching. For example, the `exec` call below is not itself a `RegExpTest`,
646+
* but the `match` variable in the condition is:
647+
* ```js
648+
* let match = regexp.exec(str);
649+
* if (!match) { ... } // <--- 'match' is the RegExpTest
650+
* ```
651+
*/
652+
class RegExpTest extends DataFlow::Node {
653+
RegExpTest::Range range;
654+
655+
RegExpTest() { this = range }
656+
657+
/**
658+
* Gets the AST of the regular expression used in the test, if it can be seen locally.
659+
*/
660+
RegExpTerm getRegExp() {
661+
result = getRegExpOperand().getALocalSource().(DataFlow::RegExpCreationNode).getRoot()
662+
or
663+
result = range.getRegExpOperand(true).asExpr().(StringLiteral).asRegExp()
664+
}
665+
666+
/**
667+
* Gets the data flow node corresponding to the regular expression object used in the test.
668+
*
669+
* In some cases this represents a string value being coerced to a RegExp object.
670+
*/
671+
DataFlow::Node getRegExpOperand() { result = range.getRegExpOperand(_) }
672+
673+
/**
674+
* Gets the data flow node corresponding to the string being tested against the regular expression.
675+
*/
676+
DataFlow::Node getStringOperand() { result = range.getStringOperand() }
677+
678+
/**
679+
* Gets the return value indicating that the string matched the regular expression.
680+
*
681+
* For example, for `regexp.exec(str) == null`, the polarity is `false`, and for
682+
* `regexp.exec(str) != null` the polarity is `true`.
683+
*/
684+
boolean getPolarity() { result = range.getPolarity() }
685+
}
686+
687+
/**
688+
* Companion module to the `RegExpTest` class.
689+
*/
690+
module RegExpTest {
691+
/**
692+
* A data flow node whose boolean value indicates whether a regexp matches a given string.
693+
*
694+
* This class can be extended to contribute new kinds of `RegExpTest` nodes.
695+
*/
696+
abstract class Range extends DataFlow::Node {
697+
/**
698+
* Gets the data flow node corresponding to the regular expression object used in the test.
699+
*/
700+
abstract DataFlow::Node getRegExpOperand(boolean coerced);
701+
702+
/**
703+
* Gets the data flow node corresponding to the string being tested against the regular expression.
704+
*/
705+
abstract DataFlow::Node getStringOperand();
706+
707+
/**
708+
* Gets the return value indicating that the string matched the regular expression.
709+
*/
710+
boolean getPolarity() { result = true }
711+
}
712+
713+
private class TestCall extends Range, DataFlow::MethodCallNode {
714+
TestCall() { getMethodName() = "test" }
715+
716+
override DataFlow::Node getRegExpOperand(boolean coerced) { result = getReceiver() and coerced = false }
717+
718+
override DataFlow::Node getStringOperand() { result = getArgument(0) }
719+
}
720+
721+
private class MatchesCall extends Range, DataFlow::MethodCallNode {
722+
MatchesCall() { getMethodName() = "matches" }
723+
724+
override DataFlow::Node getRegExpOperand(boolean coerced) { result = getArgument(0) and coerced = true }
725+
726+
override DataFlow::Node getStringOperand() { result = getReceiver() }
727+
}
728+
729+
private class ExecCall extends DataFlow::MethodCallNode {
730+
ExecCall() { getMethodName() = "exec" }
731+
}
732+
733+
predicate isCoercedToBoolean(Expr e) {
734+
e = any(ConditionGuardNode guard).getTest()
735+
or
736+
e = any(LogNotExpr n).getOperand()
737+
}
738+
739+
/**
740+
* Holds if `e` evaluating to `polarity` implies that `operand` is not null.
741+
*/
742+
private predicate impliesNotNull(Expr e, Expr operand, boolean polarity) {
743+
exists(EqualityTest test |
744+
e = test and
745+
polarity = test.getPolarity().booleanNot() and
746+
test.hasOperands(any(NullLiteral n), operand)
747+
)
748+
or
749+
isCoercedToBoolean(e) and
750+
operand = e and
751+
polarity = true
752+
}
753+
754+
private class ExecTest extends Range, DataFlow::ValueNode {
755+
ExecCall exec;
756+
boolean polarity;
757+
758+
ExecTest() {
759+
exists(Expr use | exec.flowsToExpr(use) |
760+
impliesNotNull(astNode, use, polarity)
761+
)
762+
}
763+
764+
override DataFlow::Node getRegExpOperand(boolean coerced) { result = exec.getReceiver() and coerced = false }
765+
766+
override DataFlow::Node getStringOperand() { result = exec.getArgument(0) }
767+
768+
override boolean getPolarity() { result = polarity }
769+
}
770+
}
632771
}
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
regexpTest
2+
| tst.js:6:9:6:28 | /^[a-z]+$/.test(str) |
3+
| tst.js:7:9:7:36 | /^[a-z] ... != null |
4+
| tst.js:8:9:8:28 | /^[a-z]+$/.exec(str) |
5+
| tst.js:9:9:9:31 | str.mat ... -z]+$/) |
6+
| tst.js:10:9:10:31 | str.mat ... -z]+$") |
7+
| tst.js:12:9:12:24 | regexp.test(str) |
8+
| tst.js:13:9:13:32 | regexp. ... != null |
9+
| tst.js:14:9:14:24 | regexp.exec(str) |
10+
| tst.js:15:9:15:27 | str.matches(regexp) |
11+
| tst.js:18:9:18:13 | match |
12+
| tst.js:19:10:19:14 | match |
13+
| tst.js:20:9:20:21 | match == null |
14+
| tst.js:21:9:21:21 | match != null |
15+
| tst.js:22:9:22:13 | match |
16+
| tst.js:25:23:25:27 | match |
17+
| tst.js:29:21:29:36 | regexp.test(str) |
18+
| tst.js:33:21:33:39 | str.matches(regexp) |
19+
#select
20+
| tst.js:6:9:6:28 | /^[a-z]+$/.test(str) | tst.js:6:10:6:17 | ^[a-z]+$ | tst.js:6:9:6:18 | /^[a-z]+$/ | tst.js:6:25:6:27 | str | true |
21+
| tst.js:7:9:7:36 | /^[a-z] ... != null | tst.js:7:10:7:17 | ^[a-z]+$ | tst.js:7:9:7:18 | /^[a-z]+$/ | tst.js:7:25:7:27 | str | true |
22+
| tst.js:8:9:8:28 | /^[a-z]+$/.exec(str) | tst.js:8:10:8:17 | ^[a-z]+$ | tst.js:8:9:8:18 | /^[a-z]+$/ | tst.js:8:25:8:27 | str | true |
23+
| tst.js:9:9:9:31 | str.mat ... -z]+$/) | tst.js:9:22:9:29 | ^[a-z]+$ | tst.js:9:21:9:30 | /^[a-z]+$/ | tst.js:9:9:9:11 | str | true |
24+
| tst.js:10:9:10:31 | str.mat ... -z]+$") | tst.js:10:22:10:29 | ^[a-z]+$ | tst.js:10:21:10:30 | "^[a-z]+$" | tst.js:10:9:10:11 | str | true |
25+
| tst.js:12:9:12:24 | regexp.test(str) | tst.js:3:17:3:24 | ^[a-z]+$ | tst.js:12:9:12:14 | regexp | tst.js:12:21:12:23 | str | true |
26+
| tst.js:13:9:13:32 | regexp. ... != null | tst.js:3:17:3:24 | ^[a-z]+$ | tst.js:13:9:13:14 | regexp | tst.js:13:21:13:23 | str | true |
27+
| tst.js:14:9:14:24 | regexp.exec(str) | tst.js:3:17:3:24 | ^[a-z]+$ | tst.js:14:9:14:14 | regexp | tst.js:14:21:14:23 | str | true |
28+
| tst.js:15:9:15:27 | str.matches(regexp) | tst.js:3:17:3:24 | ^[a-z]+$ | tst.js:15:21:15:26 | regexp | tst.js:15:9:15:11 | str | true |
29+
| tst.js:18:9:18:13 | match | tst.js:3:17:3:24 | ^[a-z]+$ | tst.js:17:17:17:22 | regexp | tst.js:17:29:17:31 | str | true |
30+
| tst.js:19:10:19:14 | match | tst.js:3:17:3:24 | ^[a-z]+$ | tst.js:17:17:17:22 | regexp | tst.js:17:29:17:31 | str | true |
31+
| tst.js:20:9:20:21 | match == null | tst.js:3:17:3:24 | ^[a-z]+$ | tst.js:17:17:17:22 | regexp | tst.js:17:29:17:31 | str | false |
32+
| tst.js:21:9:21:21 | match != null | tst.js:3:17:3:24 | ^[a-z]+$ | tst.js:17:17:17:22 | regexp | tst.js:17:29:17:31 | str | true |
33+
| tst.js:22:9:22:13 | match | tst.js:3:17:3:24 | ^[a-z]+$ | tst.js:17:17:17:22 | regexp | tst.js:17:29:17:31 | str | true |
34+
| tst.js:25:23:25:27 | match | tst.js:3:17:3:24 | ^[a-z]+$ | tst.js:17:17:17:22 | regexp | tst.js:17:29:17:31 | str | true |
35+
| tst.js:29:21:29:36 | regexp.test(str) | tst.js:3:17:3:24 | ^[a-z]+$ | tst.js:29:21:29:26 | regexp | tst.js:29:33:29:35 | str | true |
36+
| tst.js:33:21:33:39 | str.matches(regexp) | tst.js:3:17:3:24 | ^[a-z]+$ | tst.js:33:33:33:38 | regexp | tst.js:33:21:33:23 | str | true |
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
import javascript
2+
3+
query StringOps::RegExpTest regexpTest() { any() }
4+
5+
from StringOps::RegExpTest test
6+
select test, test.getRegExp(), test.getRegExpOperand(), test.getStringOperand(), test.getPolarity()
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
import 'dummy';
2+
3+
const regexp = /^[a-z]+$/;
4+
5+
function f(str) {
6+
if (/^[a-z]+$/.test(str)) {}
7+
if (/^[a-z]+$/.exec(str) != null) {}
8+
if (/^[a-z]+$/.exec(str)) {}
9+
if (str.matches(/^[a-z]+$/)) {}
10+
if (str.matches("^[a-z]+$")) {}
11+
12+
if (regexp.test(str)) {}
13+
if (regexp.exec(str) != null) {}
14+
if (regexp.exec(str)) {}
15+
if (str.matches(regexp)) {}
16+
17+
let match = regexp.exec(str);
18+
if (match) {}
19+
if (!match) {}
20+
if (match == null) {}
21+
if (match != null) {}
22+
if (match && match[1] == "") {}
23+
24+
something({
25+
someOption: !!match
26+
});
27+
28+
something({
29+
someOption: regexp.test(str)
30+
});
31+
32+
something({
33+
someOption: str.matches(regexp)
34+
});
35+
36+
something({
37+
someOption: regexp.exec(str) // not recognized as RegExpTest
38+
})
39+
}

0 commit comments

Comments
 (0)