Skip to content

Commit 5838e54

Browse files
committed
JS: Sharpen recognition of string 'match' calls
1 parent 0ebf53b commit 5838e54

File tree

5 files changed

+55
-22
lines changed

5 files changed

+55
-22
lines changed

javascript/ql/src/semmle/javascript/Regexp.qll

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -922,6 +922,36 @@ private predicate isNativeStringMethod(Function func, string name) {
922922
)
923923
}
924924

925+
/**
926+
* Holds if `name` is the name of a property on a Match object returned by `String.prototype.match`,
927+
* not including array indices.
928+
*/
929+
private predicate isMatchObjectProperty(string name) {
930+
any(ExternalInstanceMemberDecl decl).hasQualifiedName("Array", name)
931+
or
932+
name in ["length", "index", "input", "groups"]
933+
}
934+
935+
/** Holds if `call` is a call to `match` whose result is used in a way that is incompatible with Match objects. */
936+
private predicate isUsedAsNonMatchObject(DataFlow::MethodCallNode call) {
937+
call.getMethodName() = "match" and
938+
call.getNumArgument() = 1 and
939+
(
940+
// Accessing a property that is absent on Match objects
941+
exists(string propName |
942+
exists(call.getAPropertyRead(propName)) and
943+
not isMatchObjectProperty(propName) and
944+
not exists(propName.toInt())
945+
)
946+
or
947+
// Awaiting the result
948+
call.flowsToExpr(any(AwaitExpr await).getOperand())
949+
or
950+
// Result is obviously unused
951+
call.asExpr() = any(ExprStmt stmt).getExpr()
952+
)
953+
}
954+
925955
/**
926956
* Holds if `source` may be interpreted as a regular expression.
927957
*/
@@ -939,7 +969,10 @@ predicate isInterpretedAsRegExp(DataFlow::Node source) {
939969
not isNativeStringMethod(func, methodName)
940970
)
941971
|
942-
methodName = "match" and source = mce.getArgument(0) and mce.getNumArgument() = 1
972+
methodName = "match" and
973+
source = mce.getArgument(0) and
974+
mce.getNumArgument() = 1 and
975+
not isUsedAsNonMatchObject(mce)
943976
or
944977
methodName = "search" and
945978
source = mce.getArgument(0) and

javascript/ql/test/query-tests/Security/CWE-020/IncompleteHostnameRegExp.expected

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
| tst-IncompleteHostnameRegExp.js:7:3:7:30 | ^http:\\/\\/(.+).example.com\\/ | This regular expression has an unrestricted wildcard '.+' which may cause 'example.com' to be matched anywhere in the URL, outside the hostname. | tst-IncompleteHostnameRegExp.js:7:2:7:31 | /^http: ... .com\\// | here |
66
| tst-IncompleteHostnameRegExp.js:10:3:10:36 | ^http:\\/\\/test.example.com\\/(?:.*) | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:10:2:10:37 | /^http: ... (?:.*)/ | here |
77
| tst-IncompleteHostnameRegExp.js:11:14:11:37 | ^http://test.example.com | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:11:13:11:38 | "^http: ... le.com" | here |
8-
| tst-IncompleteHostnameRegExp.js:12:11:12:34 | ^http://test.example.com | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:12:10:12:35 | "^http: ... le.com" | here |
8+
| tst-IncompleteHostnameRegExp.js:12:15:12:38 | ^http://test.example.com | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:12:14:12:39 | "^http: ... le.com" | here |
99
| tst-IncompleteHostnameRegExp.js:15:23:15:46 | ^http://test.example.com | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:15:13:15:50 | id(id(i ... com"))) | here |
1010
| tst-IncompleteHostnameRegExp.js:19:18:19:34 | ^test.example.com | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:20:13:20:26 | `${hostname}$` | here |
1111
| tst-IncompleteHostnameRegExp.js:22:28:22:44 | test.example.com$ | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:23:13:23:27 | domain.hostname | here |

javascript/ql/test/query-tests/Security/CWE-020/MissingRegExpAnchor.expected

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
| tst-IncompleteHostnameRegExp.js:5:2:5:29 | /^http: ... le.net/ | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
44
| tst-IncompleteHostnameRegExp.js:6:2:6:43 | /^http: ... b).com/ | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
55
| tst-IncompleteHostnameRegExp.js:11:13:11:38 | "^http: ... le.com" | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
6-
| tst-IncompleteHostnameRegExp.js:12:10:12:35 | "^http: ... le.com" | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
6+
| tst-IncompleteHostnameRegExp.js:12:14:12:39 | "^http: ... le.com" | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
77
| tst-IncompleteHostnameRegExp.js:15:22:15:47 | "^http: ... le.com" | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
88
| tst-IncompleteHostnameRegExp.js:19:17:19:35 | '^test.example.com' | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
99
| tst-IncompleteHostnameRegExp.js:40:2:40:30 | /^https ... le.com/ | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
@@ -51,18 +51,18 @@
5151
| tst-SemiAnchoredRegExp.js:96:2:96:55 | /^mouse ... ragend/ | Misleading operator precedence. The subexpression '^mouse' is anchored at the beginning, but the other parts of this regular expression are not |
5252
| tst-SemiAnchoredRegExp.js:97:2:97:14 | /^xxx:\|yyy:/i | Misleading operator precedence. The subexpression '^xxx:' is anchored at the beginning, but the other parts of this regular expression are not |
5353
| tst-SemiAnchoredRegExp.js:98:2:98:18 | /_xxx\|_yyy\|_zzz$/ | Misleading operator precedence. The subexpression '_zzz$' is anchored at the end, but the other parts of this regular expression are not |
54-
| tst-UnanchoredUrlRegExp.js:3:43:3:61 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
55-
| tst-UnanchoredUrlRegExp.js:4:54:4:72 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
56-
| tst-UnanchoredUrlRegExp.js:5:43:5:62 | "^https?://good.com" | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
57-
| tst-UnanchoredUrlRegExp.js:6:43:6:64 | /^https ... od.com/ | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
58-
| tst-UnanchoredUrlRegExp.js:7:43:7:87 | "(^http ... 2.com)" | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
59-
| tst-UnanchoredUrlRegExp.js:8:43:8:86 | "(https ... e.com)" | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
54+
| tst-UnanchoredUrlRegExp.js:3:47:3:65 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
55+
| tst-UnanchoredUrlRegExp.js:4:58:4:76 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
56+
| tst-UnanchoredUrlRegExp.js:5:47:5:66 | "^https?://good.com" | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
57+
| tst-UnanchoredUrlRegExp.js:6:47:6:68 | /^https ... od.com/ | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
58+
| tst-UnanchoredUrlRegExp.js:7:47:7:91 | "(^http ... 2.com)" | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
59+
| tst-UnanchoredUrlRegExp.js:8:47:8:90 | "(https ... e.com)" | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
6060
| tst-UnanchoredUrlRegExp.js:10:2:10:22 | /https? ... od.com/ | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
6161
| tst-UnanchoredUrlRegExp.js:11:13:11:31 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
6262
| tst-UnanchoredUrlRegExp.js:13:44:13:62 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
6363
| tst-UnanchoredUrlRegExp.js:15:13:15:31 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
64-
| tst-UnanchoredUrlRegExp.js:19:43:19:61 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
65-
| tst-UnanchoredUrlRegExp.js:20:43:20:66 | "https? ... m:8080" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
64+
| tst-UnanchoredUrlRegExp.js:19:47:19:65 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
65+
| tst-UnanchoredUrlRegExp.js:20:47:20:70 | "https? ... m:8080" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
6666
| tst-UnanchoredUrlRegExp.js:23:3:23:21 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
6767
| tst-UnanchoredUrlRegExp.js:24:3:24:23 | /https? ... od.com/ | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
6868
| tst-UnanchoredUrlRegExp.js:25:14:25:32 | "https?://good.com" | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |

javascript/ql/test/query-tests/Security/CWE-020/tst-IncompleteHostnameRegExp.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
/^http:\/\/(?:.+)\\.test\\.example.com\//; // NOT OK
1010
/^http:\/\/test.example.com\/(?:.*)/; // OK
1111
new RegExp("^http://test.example.com"); // NOT OK
12-
s.match("^http://test.example.com"); // NOT OK
12+
if (s.match("^http://test.example.com")) {} // NOT OK
1313

1414
function id(e) { return e; }
1515
new RegExp(id(id(id("^http://test.example.com")))); // NOT OK

javascript/ql/test/query-tests/Security/CWE-020/tst-UnanchoredUrlRegExp.js

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
(function(x){
22

3-
"http://evil.com/?http://good.com".match("https?://good.com"); // NOT OK
4-
"http://evil.com/?http://good.com".match(new RegExp("https?://good.com")); // NOT OK
5-
"http://evil.com/?http://good.com".match("^https?://good.com"); // NOT OK - missing post-anchor
6-
"http://evil.com/?http://good.com".match(/^https?:\/\/good.com/); // NOT OK - missing post-anchor
7-
"http://evil.com/?http://good.com".match("(^https?://good1.com)|(^https?://good2.com)"); // NOT OK - missing post-anchor
8-
"http://evil.com/?http://good.com".match("(https?://good.com)|(^https?://goodie.com)"); // NOT OK - missing post-anchor
3+
if ("http://evil.com/?http://good.com".match("https?://good.com")) {} // NOT OK
4+
if ("http://evil.com/?http://good.com".match(new RegExp("https?://good.com"))) {} // NOT OK
5+
if ("http://evil.com/?http://good.com".match("^https?://good.com")) {} // NOT OK - missing post-anchor
6+
if ("http://evil.com/?http://good.com".match(/^https?:\/\/good.com/)) {} // NOT OK - missing post-anchor
7+
if ("http://evil.com/?http://good.com".match("(^https?://good1.com)|(^https?://good2.com)")) {} // NOT OK - missing post-anchor
8+
if ("http://evil.com/?http://good.com".match("(https?://good.com)|(^https?://goodie.com)")) {} // NOT OK - missing post-anchor
99

1010
/https?:\/\/good.com/.exec("http://evil.com/?http://good.com"); // NOT OK
1111
new RegExp("https?://good.com").exec("http://evil.com/?http://good.com"); // NOT OK
@@ -14,10 +14,10 @@
1414

1515
new RegExp("https?://good.com").test("http://evil.com/?http://good.com"); // NOT OK
1616

17-
"something".match("other"); // OK
18-
"something".match("x.commissary"); // OK
19-
"http://evil.com/?http://good.com".match("https?://good.com"); // NOT OK
20-
"http://evil.com/?http://good.com".match("https?://good.com:8080"); // NOT OK
17+
if ("something".match("other")) {} // OK
18+
if ("something".match("x.commissary")) {} // OK
19+
if ("http://evil.com/?http://good.com".match("https?://good.com")) {} // NOT OK
20+
if ("http://evil.com/?http://good.com".match("https?://good.com:8080")) {} // NOT OK
2121

2222
let trustedUrls = [
2323
"https?://good.com", // NOT OK, referenced below

0 commit comments

Comments
 (0)