Skip to content

Commit 6fb1058

Browse files
committed
Swift: Copy IncompleteHostnameRegex query from JS.
1 parent 700f383 commit 6fb1058

File tree

8 files changed

+211
-0
lines changed

8 files changed

+211
-0
lines changed
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
/**
2+
* Provides predicates for reasoning about regular expressions
3+
* that match URLs and hostname patterns.
4+
*/
5+
6+
deprecated import semmle.javascript.security.regexp.HostnameRegexp as Dep
7+
import Dep
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
/**
2+
* Provides predicates for reasoning about regular expressions
3+
* that match URLs and hostname patterns.
4+
*/
5+
6+
private import javascript as JS
7+
private import semmle.javascript.security.regexp.RegExpTreeView::RegExpTreeView as TreeImpl
8+
private import semmle.javascript.Regexp as RegExp
9+
private import codeql.regex.HostnameRegexp as Shared
10+
11+
/** An implementation of the signature that allows the Hostname analysis to run. */
12+
module Impl implements Shared::HostnameRegexpSig<TreeImpl> {
13+
class DataFlowNode = JS::DataFlow::Node;
14+
15+
class RegExpPatternSource = RegExp::RegExpPatternSource;
16+
}
17+
18+
import Shared::Make<TreeImpl, Impl>
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
app.get('/some/path', function(req, res) {
2+
let url = req.param('url'),
3+
host = urlLib.parse(url).host;
4+
// BAD: the host of `url` may be controlled by an attacker
5+
let regex = /^((www|beta).)?example.com/;
6+
if (host.match(regex)) {
7+
res.redirect(url);
8+
}
9+
});
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
<!DOCTYPE qhelp PUBLIC
2+
"-//Semmle//qhelp//EN"
3+
"qhelp.dtd">
4+
<qhelp>
5+
6+
<overview>
7+
<p>
8+
9+
Sanitizing untrusted URLs is an important technique for
10+
preventing attacks such as request forgeries and malicious
11+
redirections. Often, this is done by checking that the host of a URL
12+
is in a set of allowed hosts.
13+
14+
</p>
15+
16+
<p>
17+
18+
If a regular expression implements such a check, it is
19+
easy to accidentally make the check too permissive by not escaping the
20+
<code>.</code> meta-characters appropriately.
21+
22+
Even if the check is not used in a security-critical
23+
context, the incomplete check may still cause undesirable behaviors
24+
when it accidentally succeeds.
25+
26+
</p>
27+
</overview>
28+
29+
<recommendation>
30+
<p>
31+
32+
Escape all meta-characters appropriately when constructing
33+
regular expressions for security checks, and pay special attention to the
34+
<code>.</code> meta-character.
35+
36+
</p>
37+
</recommendation>
38+
39+
<example>
40+
41+
<p>
42+
43+
The following example code checks that a URL redirection
44+
will reach the <code>example.com</code> domain, or one of its
45+
subdomains.
46+
47+
</p>
48+
49+
<sample src="examples/IncompleteHostnameRegExp.js"/>
50+
51+
<p>
52+
53+
The check is however easy to bypass because the unescaped
54+
<code>.</code> allows for any character before
55+
<code>example.com</code>, effectively allowing the redirect to go to
56+
an attacker-controlled domain such as <code>wwwXexample.com</code>.
57+
58+
</p>
59+
<p>
60+
61+
Address this vulnerability by escaping <code>.</code>
62+
appropriately: <code>let regex = /^((www|beta)\.)?example\.com/</code>.
63+
64+
</p>
65+
66+
</example>
67+
68+
<references>
69+
<li>MDN: <a href="https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions">Regular Expressions</a></li>
70+
<li>OWASP: <a href="https://www.owasp.org/index.php/Server_Side_Request_Forgery">SSRF</a></li>
71+
<li>OWASP: <a href="https://cheatsheetseries.owasp.org/cheatsheets/Unvalidated_Redirects_and_Forwards_Cheat_Sheet.html">XSS Unvalidated Redirects and Forwards Cheat Sheet</a>.</li>
72+
</references>
73+
</qhelp>
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
/**
2+
* @name Incomplete regular expression for hostnames
3+
* @description Matching a URL or hostname against a regular expression that contains an unescaped dot as part of the hostname might match more hostnames than expected.
4+
* @kind problem
5+
* @problem.severity warning
6+
* @security-severity 7.8
7+
* @precision high
8+
* @id js/incomplete-hostname-regexp
9+
* @tags correctness
10+
* security
11+
* external/cwe/cwe-020
12+
*/
13+
14+
private import semmle.javascript.security.regexp.HostnameRegexp as HostnameRegexp
15+
16+
query predicate problems = HostnameRegexp::incompleteHostnameRegExp/4;
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
| tst-IncompleteHostnameRegExp.js:3:3:3:28 | ^http:\\/\\/test.example.com | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:3:2:3:29 | /^http: ... le.com/ | here |
2+
| tst-IncompleteHostnameRegExp.js:5:3:5:28 | ^http:\\/\\/test.example.net | This regular expression has an unescaped '.' before 'example.net', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:5:2:5:29 | /^http: ... le.net/ | here |
3+
| tst-IncompleteHostnameRegExp.js:6:3:6:42 | ^http:\\/\\/test.(example-a\|example-b).com | This regular expression has an unescaped '.' before '(example-a\|example-b).com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:6:2:6:43 | /^http: ... b).com/ | here |
4+
| tst-IncompleteHostnameRegExp.js:7:3:7:30 | ^http:\\/\\/(.+).example.com\\/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:7:2:7:31 | /^http: ... .com\\// | here |
5+
| tst-IncompleteHostnameRegExp.js:7:3:7:30 | ^http:\\/\\/(.+).example.com\\/ | This regular expression has an unrestricted wildcard '.+' which may cause 'example.com' to be matched anywhere in the URL, outside the hostname. | tst-IncompleteHostnameRegExp.js:7:2:7:31 | /^http: ... .com\\// | here |
6+
| tst-IncompleteHostnameRegExp.js:10:3:10:36 | ^http:\\/\\/test.example.com\\/(?:.*) | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:10:2:10:37 | /^http: ... (?:.*)/ | here |
7+
| tst-IncompleteHostnameRegExp.js:11:14:11:37 | ^http://test.example.com | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:11:13:11:38 | "^http: ... le.com" | here |
8+
| tst-IncompleteHostnameRegExp.js:12:15:12:38 | ^http://test.example.com | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:12:14:12:39 | "^http: ... le.com" | here |
9+
| tst-IncompleteHostnameRegExp.js:15:23:15:46 | ^http://test.example.com | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:15:13:15:50 | id(id(i ... com"))) | here |
10+
| tst-IncompleteHostnameRegExp.js:19:18:19:34 | ^test.example.com | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:20:13:20:26 | `${hostname}$` | here |
11+
| tst-IncompleteHostnameRegExp.js:22:28:22:44 | test.example.com$ | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:23:13:23:27 | domain.hostname | here |
12+
| tst-IncompleteHostnameRegExp.js:28:24:28:40 | test.example.com$ | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:26:21:26:35 | domain.hostname | here |
13+
| tst-IncompleteHostnameRegExp.js:30:31:30:47 | test.example.com$ | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:32:21:32:35 | domain.hostname | here |
14+
| tst-IncompleteHostnameRegExp.js:37:3:37:53 | ^(https?:)?\\/\\/((service\|www).)?example.com(?=$\|\\/) | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:37:2:37:54 | /^(http ... =$\|\\/)/ | here |
15+
| tst-IncompleteHostnameRegExp.js:38:3:38:43 | ^(http\|https):\\/\\/www.example.com\\/p\\/f\\/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:38:2:38:44 | /^(http ... p\\/f\\// | here |
16+
| tst-IncompleteHostnameRegExp.js:39:5:39:30 | http:\\/\\/sub.example.com\\/ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:39:2:39:33 | /^(http ... om\\/)/g | here |
17+
| tst-IncompleteHostnameRegExp.js:40:3:40:29 | ^https?:\\/\\/api.example.com | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:40:2:40:30 | /^https ... le.com/ | here |
18+
| tst-IncompleteHostnameRegExp.js:41:42:41:48 | ^https?://.+\\.example\\.com/ | This regular expression has an unrestricted wildcard '.+' which may cause 'example\\.com/' to be matched anywhere in the URL, outside the hostname. | tst-IncompleteHostnameRegExp.js:41:13:41:71 | '^http: ... \\.com/' | here |
19+
| tst-IncompleteHostnameRegExp.js:43:3:43:32 | ^https:\\/\\/[a-z]*.example.com$ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:43:2:43:33 | /^https ... e.com$/ | here |
20+
| tst-IncompleteHostnameRegExp.js:44:32:44:45 | .+.example.net | This regular expression has an unescaped '.' before 'example.net', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:44:9:44:101 | '^proto ... ernal)' | here |
21+
| tst-IncompleteHostnameRegExp.js:44:47:44:62 | .+.example-a.com | This regular expression has an unescaped '.' before 'example-a.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:44:9:44:101 | '^proto ... ernal)' | here |
22+
| tst-IncompleteHostnameRegExp.js:44:64:44:79 | .+.example-b.com | This regular expression has an unescaped '.' before 'example-b.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:44:9:44:101 | '^proto ... ernal)' | here |
23+
| tst-IncompleteHostnameRegExp.js:48:42:48:47 | ^https?://.+.example\\.com/ | This regular expression has an unescaped '.' before 'example\\.com/', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:48:13:48:69 | '^http: ... \\.com/' | here |
24+
| tst-IncompleteHostnameRegExp.js:48:42:48:47 | ^https?://.+.example\\.com/ | This regular expression has an unrestricted wildcard '.+' which may cause 'example\\.com/' to be matched anywhere in the URL, outside the hostname. | tst-IncompleteHostnameRegExp.js:48:13:48:69 | '^http: ... \\.com/' | here |
25+
| tst-IncompleteHostnameRegExp.js:53:14:53:35 | test.example.com$ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:53:13:53:36 | 'test.' ... e.com$' | here |
26+
| tst-IncompleteHostnameRegExp.js:55:14:55:38 | ^http://test.example.com | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:55:13:55:39 | '^http: ... le.com' | here |
27+
| tst-IncompleteHostnameRegExp.js:59:5:59:20 | foo.example\\.com | This regular expression has an unescaped '.' before 'example\\.com', so it might match more hosts than expected. | tst-IncompleteHostnameRegExp.js:59:2:59:32 | /^(foo. ... ever)$/ | here |
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Security/CWE-020/IncompleteHostnameRegExp.ql
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
(function() {
2+
/^http:\/\/example.com/; // OK
3+
/^http:\/\/test.example.com/; // NOT OK
4+
/^http:\/\/test\\.example.com/; // OK
5+
/^http:\/\/test.example.net/; // NOT OK
6+
/^http:\/\/test.(example-a|example-b).com/; // NOT OK
7+
/^http:\/\/(.+).example.com\//; // NOT OK
8+
/^http:\/\/(\\.+)\\.example.com/; // OK
9+
/^http:\/\/(?:.+)\\.test\\.example.com\//; // NOT OK
10+
/^http:\/\/test.example.com\/(?:.*)/; // OK
11+
new RegExp("^http://test.example.com"); // NOT OK
12+
if (s.match("^http://test.example.com")) {} // NOT OK
13+
14+
function id(e) { return e; }
15+
new RegExp(id(id(id("^http://test.example.com")))); // NOT OK
16+
17+
new RegExp(`test.example.com$`); // NOT OK
18+
19+
let hostname = '^test.example.com'; // NOT OK
20+
new RegExp(`${hostname}$`);
21+
22+
let domain = { hostname: 'test.example.com$' }; // NOT OK
23+
new RegExp(domain.hostname);
24+
25+
function convert1(domain) {
26+
return new RegExp(domain.hostname);
27+
}
28+
convert1({ hostname: 'test.example.com$' }); // NOT OK
29+
30+
let domains = [ { hostname: 'test.example.com$' } ]; // NOT OK
31+
function convert2(domain) {
32+
return new RegExp(domain.hostname);
33+
}
34+
domains.map(d => convert2(d));
35+
36+
/^(.+\.(?:example-a|example-b)\.com)\//; // NOT OK
37+
/^(https?:)?\/\/((service|www).)?example.com(?=$|\/)/; // NOT OK
38+
/^(http|https):\/\/www.example.com\/p\/f\//; // NOT OK
39+
/^(http:\/\/sub.example.com\/)/g; // NOT OK
40+
/^https?:\/\/api.example.com/; // NOT OK
41+
new RegExp('^http://localhost:8000|' + '^https?://.+\\.example\\.com/'); // NOT OK
42+
new RegExp('^http[s]?:\/\/?sub1\\.sub2\\.example\\.com\/f\/(.+)'); // NOT OK
43+
/^https:\/\/[a-z]*.example.com$/; // NOT OK
44+
RegExp('^protos?://(localhost|.+.example.net|.+.example-a.com|.+.example-b.com|.+.example.internal)'); // NOT OK
45+
46+
/^(example.dev|example.com)/; // OK
47+
48+
new RegExp('^http://localhost:8000|' + '^https?://.+.example\\.com/'); // NOT OK
49+
50+
var primary = 'example.com$';
51+
new RegExp('test.' + primary); // NOT OK, but not detected
52+
53+
new RegExp('test.' + 'example.com$'); // NOT OK
54+
55+
new RegExp('^http://test\.example.com'); // NOT OK
56+
57+
/^http:\/\/(..|...)\.example\.com\/index\.html/; // OK, wildcards are intentional
58+
/^http:\/\/.\.example\.com\/index\.html/; // OK, the wildcard is intentional
59+
/^(foo.example\.com|whatever)$/; // kinda OK - one disjunction doesn't even look like a hostname
60+
});

0 commit comments

Comments
 (0)