Skip to content

Commit 6a21fa0

Browse files
authored
Merge pull request github#14034 from geoffw0/hostname
Swift: New query: Incomplete regular expression for hostnames
2 parents c32c4bb + 415d9e0 commit 6a21fa0

File tree

10 files changed

+315
-9
lines changed

10 files changed

+315
-9
lines changed

swift/ql/lib/codeql/swift/regex/Regex.qll

Lines changed: 40 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,23 +8,54 @@ private import codeql.swift.dataflow.DataFlow
88
private import internal.ParseRegex
99
private import internal.RegexTracking
1010

11+
/**
12+
* A data flow node whose value may flow to a position where it is interpreted
13+
* as a part of a regular expression. For example the string literal
14+
* `"(a|b).*"` in:
15+
* ```
16+
* Regex("(a|b).*").firstMatch(in: myString)
17+
* ```
18+
*/
19+
abstract class RegexPatternSource extends DataFlow::Node {
20+
/**
21+
* Gets a node where the pattern of this node is parsed as a part of
22+
* a regular expression.
23+
*/
24+
abstract DataFlow::Node getAParse();
25+
26+
/**
27+
* Gets the root term of the regular expression parsed from this pattern.
28+
*/
29+
abstract RegExpTerm getRegExpTerm();
30+
}
31+
32+
/**
33+
* For each `RegexPatternSource` data flow node, the corresponding `Expr` is
34+
* a `Regex`. This is a simple wrapper to make that happen.
35+
*/
36+
private class RegexFromRegexPatternSource extends RegExp {
37+
RegexFromRegexPatternSource() { this = any(RegexPatternSource node).asExpr() }
38+
}
39+
1140
/**
1241
* A string literal that is used as a regular expression. For example
1342
* the string literal `"(a|b).*"` in:
1443
* ```
1544
* Regex("(a|b).*").firstMatch(in: myString)
1645
* ```
1746
*/
18-
private class ParsedStringRegex extends RegExp, StringLiteralExpr {
47+
private class ParsedStringRegex extends RegexPatternSource {
48+
StringLiteralExpr expr;
1949
DataFlow::Node use;
2050

21-
ParsedStringRegex() { StringLiteralUseFlow::flow(DataFlow::exprNode(this), use) }
51+
ParsedStringRegex() {
52+
expr = this.asExpr() and
53+
StringLiteralUseFlow::flow(this, use)
54+
}
2255

23-
/**
24-
* Gets a dataflow node where this string literal is used as a regular
25-
* expression.
26-
*/
27-
DataFlow::Node getUse() { result = use }
56+
override DataFlow::Node getAParse() { result = use }
57+
58+
override RegExpTerm getRegExpTerm() { result.getRegExp() = expr }
2859
}
2960

3061
/**
@@ -246,11 +277,11 @@ abstract class RegexEval extends CallExpr {
246277
*/
247278
RegExp getARegex() {
248279
// string literal used directly as a regex
249-
result.(ParsedStringRegex).getUse().asExpr() = this.getRegexInput()
280+
DataFlow::exprNode(result).(ParsedStringRegex).getAParse().asExpr() = this.getRegexInput()
250281
or
251282
// string literal -> regex object -> use
252283
exists(RegexCreation regexCreation |
253-
result.(ParsedStringRegex).getUse() = regexCreation.getStringInput() and
284+
DataFlow::exprNode(result).(ParsedStringRegex).getAParse() = regexCreation.getStringInput() and
254285
RegexUseFlow::flow(regexCreation, DataFlow::exprNode(this.getRegexInput()))
255286
)
256287
}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
/**
2+
* Provides predicates for reasoning about regular expressions
3+
* that match URLs and hostname patterns.
4+
*/
5+
6+
private import swift
7+
private import codeql.swift.dataflow.DataFlow
8+
private import codeql.swift.regex.Regex as Regex
9+
private import codeql.swift.regex.RegexTreeView::RegexTreeView as TreeImpl
10+
private import codeql.regex.HostnameRegexp as Shared
11+
12+
/**
13+
* An implementation of the signature that allows the Hostname analysis to run.
14+
*/
15+
private module Impl implements Shared::HostnameRegexpSig<TreeImpl> {
16+
class DataFlowNode = DataFlow::Node;
17+
18+
class RegExpPatternSource = Regex::RegexPatternSource;
19+
}
20+
21+
import Shared::Make<TreeImpl, Impl>
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
category: newQuery
3+
---
4+
5+
* Added new query "Incomplete regular expression for hostnames" (`swift/incomplete-hostname-regexp`). This query finds regular expressions matching a URL or hostname that may match more hostnames than expected.
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
<!DOCTYPE qhelp PUBLIC
2+
"-//Semmle//qhelp//EN"
3+
"qhelp.dtd">
4+
<qhelp>
5+
6+
<overview>
7+
<p>
8+
9+
Sanitizing untrusted URLs is an important technique for
10+
preventing attacks such as request forgeries and malicious
11+
redirections. Often, this is done by checking that the host of a URL
12+
is in a set of allowed hosts.
13+
14+
</p>
15+
16+
<p>
17+
18+
If a regular expression implements such a check, it is
19+
easy to accidentally make the check too permissive by not escaping the
20+
<code>.</code> meta-characters appropriately.
21+
22+
Even if the check is not used in a security-critical
23+
context, the incomplete check may still cause undesirable behaviors
24+
when it accidentally succeeds.
25+
26+
</p>
27+
</overview>
28+
29+
<recommendation>
30+
<p>
31+
32+
Escape all meta-characters appropriately when constructing
33+
regular expressions for security checks, and pay special attention to the
34+
<code>.</code> meta-character.
35+
36+
</p>
37+
</recommendation>
38+
39+
<example>
40+
41+
<p>
42+
43+
The following example code checks that a URL redirection
44+
will reach the <code>example.com</code> domain, or one of its
45+
subdomains.
46+
47+
</p>
48+
49+
<sample src="IncompleteHostnameRegexBad.swift"/>
50+
51+
<p>
52+
53+
The check is, however, easy to bypass because the unescaped
54+
<code>.</code> allows for any character before
55+
<code>example.com</code>, effectively allowing the redirect to go to
56+
an attacker-controlled domain such as <code>wwwXexample.com</code>.
57+
58+
</p>
59+
<p>
60+
61+
Address this vulnerability by escaping <code>.</code>
62+
to <code>\.</code>:
63+
64+
</p>
65+
66+
<sample src="IncompleteHostnameRegexGood.swift"/>
67+
68+
</example>
69+
70+
<references>
71+
<li>OWASP: <a href="https://www.owasp.org/index.php/Server_Side_Request_Forgery">Server Side Request Forgery</a>.</li>
72+
<li>OWASP: <a href="https://cheatsheetseries.owasp.org/cheatsheets/Unvalidated_Redirects_and_Forwards_Cheat_Sheet.html">Unvalidated Redirects and Forwards Cheat Sheet</a>.</li>
73+
</references>
74+
</qhelp>
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
/**
2+
* @name Incomplete regular expression for hostnames
3+
* @description Matching a URL or hostname against a regular expression that contains an unescaped dot as part of the hostname may match more hostnames than expected.
4+
* @kind problem
5+
* @problem.severity warning
6+
* @security-severity 7.8
7+
* @precision high
8+
* @id swift/incomplete-hostname-regexp
9+
* @tags correctness
10+
* security
11+
* external/cwe/cwe-020
12+
*/
13+
14+
private import codeql.swift.security.regex.HostnameRegex as HostnameRegex
15+
16+
query predicate problems = HostnameRegex::incompleteHostnameRegExp/4;
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
2+
func handleUrl(_ urlString: String) {
3+
// get the 'url=' parameter from the URL
4+
let components = URLComponents(string: urlString)
5+
let redirectParam = components?.queryItems?.first(where: { $0.name == "url" })
6+
7+
// check we trust the host
8+
let regex = #/^(www|beta).example.com//# // BAD
9+
if let match = redirectParam?.value?.firstMatch(of: regex) {
10+
// ... trust the URL ...
11+
}
12+
}
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
2+
func handleUrl(_ urlString: String) {
3+
// get the 'url=' parameter from the URL
4+
let components = URLComponents(string: urlString)
5+
let redirectParam = components?.queryItems?.first(where: { $0.name == "url" })
6+
7+
// check we trust the host
8+
let regex = #/^(www|beta)\.example\.com//# // GOOD
9+
if let match = redirectParam?.value?.firstMatch(of: regex) {
10+
// ... trust the URL ...
11+
}
12+
}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
| test.swift:60:17:60:40 | ^http://test.example.com/ | This regular expression has an unescaped '.' before 'example.com/', so it might match more hosts than expected. | test.swift:60:16:60:16 | ^http://test.example.com/ | here |
2+
| test.swift:63:17:63:40 | ^http://test.example.net/ | This regular expression has an unescaped '.' before 'example.net/', so it might match more hosts than expected. | test.swift:63:16:63:16 | ^http://test.example.net/ | here |
3+
| test.swift:64:17:64:54 | ^http://test.(example-a\|example-b).com/ | This regular expression has an unescaped '.' before '(example-a\|example-b).com/', so it might match more hosts than expected. | test.swift:64:16:64:16 | ^http://test.(example-a\|example-b).com/ | here |
4+
| test.swift:65:17:65:40 | ^http://(.+).example.com/ | This regular expression has an unescaped '.' before 'example.com/', so it might match more hosts than expected. | test.swift:65:16:65:16 | ^http://(.+).example.com/ | here |
5+
| test.swift:65:17:65:40 | ^http://(.+).example.com/ | This regular expression has an unrestricted wildcard '.+' which may cause 'example.com/' to be matched anywhere in the URL, outside the hostname. | test.swift:65:16:65:16 | ^http://(.+).example.com/ | here |
6+
| test.swift:67:17:67:49 | ^http://(?:.+)\\.test\\.example.com/ | This regular expression has an unrestricted wildcard '.+' which may cause 'example.com/' to be matched anywhere in the URL, outside the hostname. | test.swift:67:16:67:16 | ^http://(?:.+)\\.test\\.example.com/ | here |
7+
| test.swift:68:17:68:46 | ^http://test.example.com/(?:.*) | This regular expression has an unescaped '.' before 'example.com/', so it might match more hosts than expected. | test.swift:68:16:68:16 | ^http://test.example.com/(?:.*) | here |
8+
| test.swift:70:17:70:63 | ^(https?:)?//((service\|www).)?example.com(?=$\|/) | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | test.swift:70:16:70:16 | ^(https?:)?//((service\|www).)?example.com(?=$\|/) | here |
9+
| test.swift:71:17:71:51 | ^(http\|https)://www.example.com/p/f/ | This regular expression has an unescaped '.' before 'example.com/p/f/', so it might match more hosts than expected. | test.swift:71:16:71:16 | ^(http\|https)://www.example.com/p/f/ | here |
10+
| test.swift:72:19:72:40 | http://sub.example.com/ | This regular expression has an unescaped '.' before 'example.com/', so it might match more hosts than expected. | test.swift:72:16:72:16 | ^(http://sub.example.com/) | here |
11+
| test.swift:73:17:73:41 | ^https?://api.example.com/ | This regular expression has an unescaped '.' before 'example.com/', so it might match more hosts than expected. | test.swift:73:16:73:16 | ^https?://api.example.com/ | here |
12+
| test.swift:75:17:75:43 | ^https://[a-z]*.example.com$ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | test.swift:75:16:75:16 | ^https://[a-z]*.example.com$ | here |
13+
| test.swift:77:39:77:51 | .+.example.net | This regular expression has an unescaped '.' before 'example.net', so it might match more hosts than expected. | test.swift:77:16:77:16 | ^protos?://(localhost\|.+.example.net\|.+.example-a.com\|.+.example-b.com\|.+.example.internal) | here |
14+
| test.swift:77:54:77:68 | .+.example-a.com | This regular expression has an unescaped '.' before 'example-a.com', so it might match more hosts than expected. | test.swift:77:16:77:16 | ^protos?://(localhost\|.+.example.net\|.+.example-a.com\|.+.example-b.com\|.+.example.internal) | here |
15+
| test.swift:77:71:77:85 | .+.example-b.com | This regular expression has an unescaped '.' before 'example-b.com', so it might match more hosts than expected. | test.swift:77:16:77:16 | ^protos?://(localhost\|.+.example.net\|.+.example-a.com\|.+.example-b.com\|.+.example.internal) | here |
16+
| test.swift:81:19:81:33 | foo.example\\.com | This regular expression has an unescaped '.' before 'example\\.com', so it might match more hosts than expected. | test.swift:81:16:81:16 | ^(foo.example\\.com\|whatever)$ | here |
17+
| test.swift:83:17:83:33 | ^test.example.com$ | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | test.swift:83:16:83:16 | ^test.example.com$ | here |
18+
| test.swift:84:17:84:31 | test.example.com | This regular expression has an unescaped '.' before 'example.com', so it might match more hosts than expected. | test.swift:84:16:84:16 | test.example.com | here |
19+
| test.swift:86:26:86:41 | test.example.com$ | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | test.swift:86:16:86:48 | call to id(_:) | here |
20+
| test.swift:92:21:92:36 | test.example.com$ | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | test.swift:93:16:93:23 | .hostname | here |
21+
| test.swift:98:29:98:44 | test.example.com$ | This string, which is used as a regular expression $@, has an unescaped '.' before 'example.com', so it might match more hosts than expected. | test.swift:96:20:96:27 | .hostname | here |
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
queries/Security/CWE-020/IncompleteHostnameRegex.ql
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
2+
// --- stubs ---
3+
4+
struct URL {
5+
init?(string: String) {}
6+
}
7+
8+
extension String {
9+
init(contentsOf: URL) {
10+
let data = ""
11+
self.init(data)
12+
}
13+
}
14+
15+
struct AnyRegexOutput {
16+
}
17+
18+
protocol RegexComponent<RegexOutput> {
19+
associatedtype RegexOutput
20+
}
21+
22+
struct Regex<Output> : RegexComponent {
23+
struct Match {
24+
}
25+
26+
init(_ pattern: String) throws where Output == AnyRegexOutput { }
27+
28+
func firstMatch(in string: String) throws -> Regex<Output>.Match? { return nil}
29+
func wholeMatch(in string: String) throws -> Regex<Output>.Match? { return nil }
30+
31+
typealias RegexOutput = Output
32+
}
33+
34+
extension String : RegexComponent {
35+
typealias Output = Substring
36+
typealias RegexOutput = String.Output
37+
}
38+
39+
// --- tests ---
40+
41+
func id(_ s : String) -> String { return s }
42+
43+
struct MyDomain {
44+
init(_ hostname: String) {
45+
self.hostname = hostname
46+
}
47+
48+
var hostname: String
49+
}
50+
51+
func testHostnames(myUrl: URL) throws {
52+
let tainted = String(contentsOf: myUrl) // tainted
53+
54+
_ = try Regex(#"^http://example\.com/"#).firstMatch(in: tainted) // GOOD
55+
_ = try Regex(#"^http://example.com/"#).firstMatch(in: tainted) // GOOD (only '.' here gives a valid top-level domain)
56+
_ = try Regex(#"^http://example.com"#).firstMatch(in: tainted) // BAD (missing anchor) [NOT DETECTED]
57+
_ = try Regex(#"^http://test\.example\.com/"#).firstMatch(in: tainted) // GOOD
58+
_ = try Regex(#"^http://test\.example.com/"#).firstMatch(in: tainted) // GOOD (only '.' here gives a valid top-level domain)
59+
_ = try Regex(#"^http://test\.example.com"#).firstMatch(in: tainted) // BAD (missing anchor) [NOT DETECTED]
60+
_ = try Regex(#"^http://test.example.com/"#).firstMatch(in: tainted) // BAD (incomplete hostname)
61+
_ = try Regex(#"^http://test[.]example[.]com/"#).firstMatch(in: tainted) // GOOD (alternative method of escaping)
62+
63+
_ = try Regex(#"^http://test.example.net/"#).firstMatch(in: tainted) // BAD (incomplete hostname)
64+
_ = try Regex(#"^http://test.(example-a|example-b).com/"#).firstMatch(in: tainted) // BAD (incomplete hostname)
65+
_ = try Regex(#"^http://(.+).example.com/"#).firstMatch(in: tainted) // BAD (incomplete hostname x 2)
66+
_ = try Regex(#"^http://(\.+)\.example.com/"#).firstMatch(in: tainted) // GOOD
67+
_ = try Regex(#"^http://(?:.+)\.test\.example.com/"#).firstMatch(in: tainted) // BAD (incomplete hostname)
68+
_ = try Regex(#"^http://test.example.com/(?:.*)"#).firstMatch(in: tainted) // BAD (incomplete hostname)
69+
_ = try Regex(#"^(.+\.(?:example-a|example-b)\.com)/"#).firstMatch(in: tainted) // BAD (missing anchor) [NOT DETECTED]
70+
_ = try Regex(#"^(https?:)?//((service|www).)?example.com(?=$|/)"#).firstMatch(in: tainted) // BAD (incomplete hostname)
71+
_ = try Regex(#"^(http|https)://www.example.com/p/f/"#).firstMatch(in: tainted) // BAD (incomplete hostname)
72+
_ = try Regex(#"^(http://sub.example.com/)"#).firstMatch(in: tainted) // BAD (incomplete hostname)
73+
_ = try Regex(#"^https?://api.example.com/"#).firstMatch(in: tainted) // BAD (incomplete hostname)
74+
_ = try Regex(#"^http[s]?://?sub1\.sub2\.example\.com/f/(.+)"#).firstMatch(in: tainted) // GOOD (it has a capture group after the TLD, so should be ignored)
75+
_ = try Regex(#"^https://[a-z]*.example.com$"#).firstMatch(in: tainted) // BAD (incomplete hostname)
76+
_ = try Regex(#"^(example.dev|example.com)"#).firstMatch(in: tainted) // GOOD (any extended hostname wouldn't be included in the capture group)
77+
_ = try Regex(#"^protos?://(localhost|.+.example.net|.+.example-a.com|.+.example-b.com|.+.example.internal)"#).firstMatch(in: tainted) // BAD (incomplete hostname x3, missing anchor x 1)
78+
79+
_ = try Regex(#"^http://(..|...)\.example\.com/index\.html"#).firstMatch(in: tainted) // GOOD (wildcards are intentional)
80+
_ = try Regex(#"^http://.\.example\.com/index\.html"#).firstMatch(in: tainted) // GOOD (the wildcard is intentional)
81+
_ = try Regex(#"^(foo.example\.com|whatever)$"#).firstMatch(in: tainted) // DUBIOUS (one disjunction doesn't even look like a hostname) [DETECTED incomplete hostname]
82+
83+
_ = try Regex(#"^test.example.com$"#).firstMatch(in: tainted) // BAD (incomplete hostname)
84+
_ = try Regex(#"test.example.com"#).wholeMatch(in: tainted) // BAD (incomplete hostname, missing anchor)
85+
86+
_ = try Regex(id(id(id(#"test.example.com$"#)))).firstMatch(in: tainted) // BAD (incomplete hostname)
87+
88+
let hostname = #"test.example.com$"# // BAD (incomplete hostname) [NOT DETECTED]
89+
_ = try Regex("\(hostname)").firstMatch(in: tainted)
90+
91+
var domain = MyDomain("")
92+
domain.hostname = #"test.example.com$"# // BAD (incomplete hostname)
93+
_ = try Regex(domain.hostname).firstMatch(in: tainted)
94+
95+
func convert1(_ domain: MyDomain) throws -> Regex<AnyRegexOutput> {
96+
return try Regex(domain.hostname)
97+
}
98+
_ = try convert1(MyDomain(#"test.example.com$"#)).firstMatch(in: tainted) // BAD (incomplete hostname)
99+
100+
let domains = [ MyDomain(#"test.example.com$"#) ] // BAD (incomplete hostname) [NOT DETECTED]
101+
func convert2(_ domain: MyDomain) throws -> Regex<AnyRegexOutput> {
102+
return try Regex(domain.hostname)
103+
}
104+
_ = try domains.map({ try convert2($0).firstMatch(in: tainted) })
105+
106+
let primary = "example.com$"
107+
_ = try Regex("test." + primary).firstMatch(in: tainted) // BAD (incomplete hostname) [NOT DETECTED]
108+
_ = try Regex("test." + "example.com$").firstMatch(in: tainted) // BAD (incomplete hostname) [NOT DETECTED]
109+
_ = try Regex(#"^http://localhost:8000|" + "^https?://.+\.example\.com/"#).firstMatch(in: tainted) // BAD (incomplete hostname) [NOT DETECTED]
110+
_ = try Regex(#"^http://localhost:8000|" + "^https?://.+.example\.com/"#).firstMatch(in: tainted) // BAD (incomplete hostname) [NOT DETECTED]
111+
112+
let harmless = #"^http://test.example.com"# // GOOD (never used as a regex)
113+
}

0 commit comments

Comments
 (0)