Skip to content

Commit 431d9d5

Browse files
authored
Merge pull request #14639 from geoffw0/anchorquery
Swift: New query for Missing Regular Expression Anchor
2 parents c26c68c + 2423998 commit 431d9d5

File tree

13 files changed

+493
-6
lines changed

13 files changed

+493
-6
lines changed

shared/regex/codeql/regex/MissingRegExpAnchor.qll

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,11 @@ import HostnameRegexp as HostnameShared
1515
signature module MissingRegExpAnchorSig<
1616
RegexTreeViewSig TreeImpl, HostnameShared::HostnameRegexpSig<TreeImpl> Specific>
1717
{
18+
/**
19+
* Holds if this regular expression is used in a 'replacement' operation, such
20+
* as replacing all matches of the regular expression in the input string
21+
* with another string.
22+
*/
1823
predicate isUsedAsReplace(Specific::RegExpPatternSource pattern);
1924

2025
/** Gets a string representation of an end anchor from a regular expression. */

swift/ql/lib/codeql/swift/regex/Regex.qll

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,13 @@ abstract class RegexEval extends CallExpr {
330330
*/
331331
DataFlow::Node getAnOptionsInput() { none() }
332332

333+
/**
334+
* Holds if this regular expression evaluation is a 'replacement' operation,
335+
* such as replacing all matches of the regular expression in the input
336+
* string with another string.
337+
*/
338+
abstract predicate isUsedAsReplace();
339+
333340
/**
334341
* Gets a regular expression value that is evaluated here (if any can be identified).
335342
*/
@@ -416,6 +423,10 @@ private class AlwaysRegexEval extends RegexEval {
416423
override DataFlow::Node getRegexInputNode() { result = regexInput }
417424

418425
override DataFlow::Node getStringInputNode() { result = stringInput }
426+
427+
override predicate isUsedAsReplace() {
428+
this.getStaticTarget().getName().matches(["replac%", "stringByReplac%", "trim%"])
429+
}
419430
}
420431

421432
/**
@@ -508,4 +519,6 @@ private class NSStringCompareOptionsRegexEval extends RegexEval instanceof NSStr
508519
override DataFlow::Node getAnOptionsInput() {
509520
result = this.(NSStringCompareOptionsPotentialRegexEval).getAnOptionsInput()
510521
}
522+
523+
override predicate isUsedAsReplace() { this.getStaticTarget().getName().matches("replac%") }
511524
}

swift/ql/lib/codeql/swift/security/regex/HostnameRegex.qll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ private import codeql.regex.HostnameRegexp as Shared
1212
/**
1313
* An implementation of the signature that allows the Hostname analysis to run.
1414
*/
15-
private module Impl implements Shared::HostnameRegexpSig<TreeImpl> {
15+
module Impl implements Shared::HostnameRegexpSig<TreeImpl> {
1616
class DataFlowNode = DataFlow::Node;
1717

1818
class RegExpPatternSource = Regex::RegexPatternSource;
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
category: newQuery
3+
---
4+
5+
* Added a nw query "Missing regular expression anchor" (`swift/missing-regexp-anchor`) for Swift. This query detects regular expressions without anchors that can be vulnerable to bypassing.
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
<!DOCTYPE qhelp PUBLIC
2+
"-//Semmle//qhelp//EN"
3+
"qhelp.dtd">
4+
<qhelp>
5+
6+
<overview>
7+
<p>
8+
9+
Sanitizing untrusted input with regular expressions is a
10+
common technique, but malicious actors may be able to embed one of the
11+
allowed patterns in an unexpected location. To prevent this,
12+
you should use anchors in your regular expressions,
13+
such as <code>^</code> or <code>$</code>.
14+
15+
</p>
16+
17+
<p>
18+
19+
Even if the matching is not done in a security-critical
20+
context, it may still cause undesirable behavior when the regular
21+
expression accidentally matches.
22+
23+
</p>
24+
</overview>
25+
26+
<recommendation>
27+
<p>
28+
29+
Use anchors to ensure that regular expressions match at
30+
the expected locations.
31+
32+
</p>
33+
</recommendation>
34+
35+
<example>
36+
37+
<p>
38+
39+
The following example code attempts to check that a URL redirection
40+
will reach the <code>example.com</code> domain, and not
41+
a malicious site:
42+
43+
</p>
44+
45+
<sample src="MissingRegexAnchorBad.swift"/>
46+
47+
<p>
48+
49+
However, this regular expression check can be easily bypassed,
50+
and a malicious actor could embed
51+
<code>http://www.example.com/</code> in the query
52+
string component of a malicious site. For example,
53+
<code>http://evil-example.net/?x=http://www.example.com/</code>.
54+
Instead, you should use anchors in the regular expression check:
55+
56+
</p>
57+
58+
<sample src="MissingRegexAnchorGood.swift"/>
59+
60+
<p>
61+
62+
If you need to write a regular expression to match
63+
multiple hosts, you should include an anchor for all of the
64+
alternatives. For example, the regular expression
65+
<code>/^www\.example\.com|beta\.example\.com/</code> will match the host
66+
<code>evil.beta.example.com</code>, because the regular expression is parsed
67+
as <code>/(^www\.example\.com)|(beta\.example\.com)/</code>.
68+
69+
</p>
70+
</example>
71+
72+
<references>
73+
<li>OWASP: <a href="https://www.owasp.org/index.php/Server_Side_Request_Forgery">SSRF</a></li>
74+
<li>OWASP: <a href="https://cheatsheetseries.owasp.org/cheatsheets/Unvalidated_Redirects_and_Forwards_Cheat_Sheet.html">XSS Unvalidated Redirects and Forwards Cheat Sheet</a>.</li>
75+
</references>
76+
</qhelp>
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
/**
2+
* @name Missing regular expression anchor
3+
* @description Regular expressions without anchors can be vulnerable to bypassing.
4+
* @kind problem
5+
* @problem.severity warning
6+
* @security-severity 7.8
7+
* @precision high
8+
* @id swift/missing-regexp-anchor
9+
* @tags correctness
10+
* security
11+
* external/cwe/cwe-020
12+
*/
13+
14+
private import swift
15+
private import codeql.swift.dataflow.DataFlow
16+
private import codeql.swift.regex.Regex
17+
private import codeql.swift.regex.RegexTreeView::RegexTreeView as TreeImpl
18+
private import codeql.swift.security.regex.HostnameRegex as HostnameRegex
19+
private import codeql.regex.MissingRegExpAnchor as MissingRegExpAnchor
20+
21+
private module Impl implements
22+
MissingRegExpAnchor::MissingRegExpAnchorSig<TreeImpl, HostnameRegex::Impl>
23+
{
24+
predicate isUsedAsReplace(RegexPatternSource pattern) {
25+
exists(RegexEval eval |
26+
eval.getARegex() = pattern.asExpr() and
27+
eval.isUsedAsReplace()
28+
)
29+
}
30+
31+
string getEndAnchorText() { result = "$" }
32+
}
33+
34+
import MissingRegExpAnchor::Make<TreeImpl, HostnameRegex::Impl, Impl>
35+
36+
from DataFlow::Node node, string msg
37+
where
38+
isUnanchoredHostnameRegExp(node, msg)
39+
or
40+
isSemiAnchoredHostnameRegExp(node, msg)
41+
or
42+
hasMisleadingAnchorPrecedence(node, msg)
43+
or
44+
isLineAnchoredHostnameRegExp(node, msg)
45+
select node, msg
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
func handleUrl(_ urlString: String) {
2+
// get the 'url=' parameter from the URL
3+
let components = URLComponents(string: urlString)
4+
let redirectParam = components?.queryItems?.first(where: { $0.name == "url" })
5+
6+
// check we trust the host
7+
let regex = try Regex(#"https?://www\.example\.com"#) // BAD: the host of `url` may be controlled by an attacker
8+
if let match = redirectParam?.value?.firstMatch(of: regex) {
9+
// ... trust the URL ...
10+
}
11+
}
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
func handleUrl(_ urlString: String) {
2+
// get the 'url=' parameter from the URL
3+
let components = URLComponents(string: urlString)
4+
let redirectParam = components?.queryItems?.first(where: { $0.name == "url" })
5+
6+
// check we trust the host
7+
let regex = try Regex(#"^https?://www\.example\.com"#) // GOOD: the host of `url` can not be controlled by an attacker
8+
if let match = redirectParam?.value?.firstMatch(of: regex) {
9+
// ... trust the URL ...
10+
}
11+
}
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
| SemiAnchoredRegex.swift:50:16:50:16 | ^a\|b | Misleading operator precedence. The subexpression '^a' is anchored at the beginning, but the other parts of this regular expression are not |
2+
| SemiAnchoredRegex.swift:53:16:53:16 | ^a\|b\|c | Misleading operator precedence. The subexpression '^a' is anchored at the beginning, but the other parts of this regular expression are not |
3+
| SemiAnchoredRegex.swift:59:16:59:16 | ^a\|(b) | Misleading operator precedence. The subexpression '^a' is anchored at the beginning, but the other parts of this regular expression are not |
4+
| SemiAnchoredRegex.swift:61:16:61:16 | ^(a)\|(b) | Misleading operator precedence. The subexpression '^(a)' is anchored at the beginning, but the other parts of this regular expression are not |
5+
| SemiAnchoredRegex.swift:63:16:63:16 | a\|b$ | Misleading operator precedence. The subexpression 'b$' is anchored at the end, but the other parts of this regular expression are not |
6+
| SemiAnchoredRegex.swift:66:16:66:16 | a\|b\|c$ | Misleading operator precedence. The subexpression 'c$' is anchored at the end, but the other parts of this regular expression are not |
7+
| SemiAnchoredRegex.swift:72:16:72:16 | (a)\|b$ | Misleading operator precedence. The subexpression 'b$' is anchored at the end, but the other parts of this regular expression are not |
8+
| SemiAnchoredRegex.swift:74:16:74:16 | (a)\|(b)$ | Misleading operator precedence. The subexpression '(b)$' is anchored at the end, but the other parts of this regular expression are not |
9+
| SemiAnchoredRegex.swift:76:16:76:16 | ^good.com\|better.com | Misleading operator precedence. The subexpression '^good.com' is anchored at the beginning, but the other parts of this regular expression are not |
10+
| SemiAnchoredRegex.swift:76:16:76:16 | ^good.com\|better.com | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
11+
| SemiAnchoredRegex.swift:77:16:77:16 | ^good\\.com\|better\\.com | Misleading operator precedence. The subexpression '^good\\.com' is anchored at the beginning, but the other parts of this regular expression are not |
12+
| SemiAnchoredRegex.swift:77:16:77:16 | ^good\\.com\|better\\.com | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
13+
| SemiAnchoredRegex.swift:78:16:78:16 | ^good\\\\.com\|better\\\\.com | Misleading operator precedence. The subexpression '^good\\\\.com' is anchored at the beginning, but the other parts of this regular expression are not |
14+
| SemiAnchoredRegex.swift:79:16:79:16 | ^good\\\\\\.com\|better\\\\\\.com | Misleading operator precedence. The subexpression '^good\\\\\\.com' is anchored at the beginning, but the other parts of this regular expression are not |
15+
| SemiAnchoredRegex.swift:80:16:80:16 | ^good\\\\\\\\.com\|better\\\\\\\\.com | Misleading operator precedence. The subexpression '^good\\\\\\\\.com' is anchored at the beginning, but the other parts of this regular expression are not |
16+
| SemiAnchoredRegex.swift:82:16:82:16 | ^foo\|bar\|baz$ | Misleading operator precedence. The subexpression '^foo' is anchored at the beginning, but the other parts of this regular expression are not |
17+
| SemiAnchoredRegex.swift:82:16:82:16 | ^foo\|bar\|baz$ | Misleading operator precedence. The subexpression 'baz$' is anchored at the end, but the other parts of this regular expression are not |
18+
| SemiAnchoredRegex.swift:89:16:89:16 | (\\.xxx)\|(\\.yyy)\|(\\.zzz)$ | Misleading operator precedence. The subexpression '(\\.zzz)$' is anchored at the end, but the other parts of this regular expression are not |
19+
| SemiAnchoredRegex.swift:90:16:90:16 | (^left\|right\|center)\\sbottom$ | Misleading operator precedence. The subexpression '^left' is anchored at the beginning, but the other parts of this regular expression are not |
20+
| SemiAnchoredRegex.swift:91:16:91:16 | \\.xxx\|\\.yyy\|\\.zzz$ | Misleading operator precedence. The subexpression '\\.zzz$' is anchored at the end, but the other parts of this regular expression are not |
21+
| SemiAnchoredRegex.swift:92:16:92:16 | \\.xxx\|\\.yyy\|\\.zzz$ | Misleading operator precedence. The subexpression '\\.zzz$' is anchored at the end, but the other parts of this regular expression are not |
22+
| SemiAnchoredRegex.swift:93:16:93:16 | \\.xxx\|\\.yyy\|zzz$ | Misleading operator precedence. The subexpression 'zzz$' is anchored at the end, but the other parts of this regular expression are not |
23+
| SemiAnchoredRegex.swift:94:16:94:16 | ^([A-Z]\|xxx[XY]$) | Misleading operator precedence. The subexpression 'xxx[XY]$' is anchored at the end, but the other parts of this regular expression are not |
24+
| SemiAnchoredRegex.swift:95:16:95:16 | ^(xxx yyy zzz)\|(xxx yyy) | Misleading operator precedence. The subexpression '^(xxx yyy zzz)' is anchored at the beginning, but the other parts of this regular expression are not |
25+
| SemiAnchoredRegex.swift:96:16:96:16 | ^(xxx yyy zzz)\|(xxx yyy)\|(1st( xxx)? yyy)\|xxx\|1st | Misleading operator precedence. The subexpression '^(xxx yyy zzz)' is anchored at the beginning, but the other parts of this regular expression are not |
26+
| SemiAnchoredRegex.swift:97:16:97:16 | ^(xxx:)\|(yyy:)\|(zzz:) | Misleading operator precedence. The subexpression '^(xxx:)' is anchored at the beginning, but the other parts of this regular expression are not |
27+
| SemiAnchoredRegex.swift:98:16:98:16 | ^(xxx?:)\|(yyy:zzz\\/) | Misleading operator precedence. The subexpression '^(xxx?:)' is anchored at the beginning, but the other parts of this regular expression are not |
28+
| SemiAnchoredRegex.swift:99:16:99:16 | ^@media\|@page | Misleading operator precedence. The subexpression '^@media' is anchored at the beginning, but the other parts of this regular expression are not |
29+
| SemiAnchoredRegex.swift:100:16:100:16 | ^\\s*(xxx?\|yyy\|zzz):\|xxx:yyy | Misleading operator precedence. The subexpression '^\\s*(xxx?\|yyy\|zzz):' is anchored at the beginning, but the other parts of this regular expression are not |
30+
| SemiAnchoredRegex.swift:101:16:101:16 | ^click\|mouse\|touch | Misleading operator precedence. The subexpression '^click' is anchored at the beginning, but the other parts of this regular expression are not |
31+
| SemiAnchoredRegex.swift:102:16:102:16 | ^http://good\\.com\|http://better\\.com | Misleading operator precedence. The subexpression '^http://good\\.com' is anchored at the beginning, but the other parts of this regular expression are not |
32+
| SemiAnchoredRegex.swift:102:16:102:16 | ^http://good\\.com\|http://better\\.com | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
33+
| SemiAnchoredRegex.swift:103:16:103:16 | ^https?://good\\.com\|https?://better\\.com | Misleading operator precedence. The subexpression '^https?://good\\.com' is anchored at the beginning, but the other parts of this regular expression are not |
34+
| SemiAnchoredRegex.swift:103:16:103:16 | ^https?://good\\.com\|https?://better\\.com | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
35+
| SemiAnchoredRegex.swift:104:16:104:16 | ^mouse\|touch\|click\|contextmenu\|drop\|dragover\|dragend | Misleading operator precedence. The subexpression '^mouse' is anchored at the beginning, but the other parts of this regular expression are not |
36+
| SemiAnchoredRegex.swift:105:16:105:16 | ^xxx:\|yyy: | Misleading operator precedence. The subexpression '^xxx:' is anchored at the beginning, but the other parts of this regular expression are not |
37+
| SemiAnchoredRegex.swift:106:16:106:16 | _xxx\|_yyy\|_zzz$ | Misleading operator precedence. The subexpression '_zzz$' is anchored at the end, but the other parts of this regular expression are not |
38+
| UnanchoredUrlRegex.swift:62:39:62:39 | https?://good.com | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
39+
| UnanchoredUrlRegex.swift:63:39:63:39 | https?://good.com | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
40+
| UnanchoredUrlRegex.swift:64:39:64:39 | ^https?://good.com | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
41+
| UnanchoredUrlRegex.swift:65:39:65:39 | (^https?://good1.com)\|(^https?://good2.com) | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
42+
| UnanchoredUrlRegex.swift:66:39:66:39 | (https?://good.com)\|(^https?://goodie.com) | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
43+
| UnanchoredUrlRegex.swift:66:39:66:39 | (https?://good.com)\|(^https?://goodie.com) | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
44+
| UnanchoredUrlRegex.swift:68:39:68:39 | https?:\\/\\/good.com | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
45+
| UnanchoredUrlRegex.swift:69:39:69:39 | https?://good.com | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
46+
| UnanchoredUrlRegex.swift:71:46:71:46 | https?://good.com | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
47+
| UnanchoredUrlRegex.swift:78:39:78:39 | https?://good.com | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
48+
| UnanchoredUrlRegex.swift:79:39:79:39 | https?://good.com:8080 | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
49+
| UnanchoredUrlRegex.swift:82:3:82:3 | https?://good.com | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
50+
| UnanchoredUrlRegex.swift:83:3:83:3 | https?:\\/\\/good.com | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
51+
| UnanchoredUrlRegex.swift:84:3:84:3 | ^https?://good.com | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
52+
| UnanchoredUrlRegex.swift:95:39:95:39 | https?:\\/\\/good.com\\/([0-9]+) | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
53+
| UnanchoredUrlRegex.swift:101:39:101:39 | example\\.com\|whatever | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
54+
| test.swift:56:16:56:16 | ^http://example.com | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
55+
| test.swift:59:16:59:16 | ^http://test\\.example.com | This hostname pattern may match any domain name, as it is missing a '$' or '/' at the end. |
56+
| test.swift:69:16:69:16 | ^(.+\\.(?:example-a\|example-b)\\.com)/ | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
57+
| test.swift:76:16:76:16 | ^(example.dev\|example.com) | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
58+
| test.swift:77:16:77:16 | ^protos?://(localhost\|.+.example.net\|.+.example-a.com\|.+.example-b.com\|.+.example.internal) | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
59+
| test.swift:81:16:81:16 | ^(foo.example\\.com\|whatever)$ | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
60+
| test.swift:84:16:84:16 | test.example.com | When this is used as a regular expression on a URL, it may match anywhere, and arbitrary hosts may come before or after it. |
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
queries/Security/CWE-020/MissingRegexAnchor.ql

0 commit comments

Comments
 (0)