Skip to content

Commit f4453f4

Browse files
authored
Merge pull request github#8573 from hmac/hmac/missing-regexp-anchor
Ruby: Add MissingRegExpAnchor query
2 parents bb049bf + bbc3043 commit f4453f4

File tree

11 files changed

+467
-5
lines changed

11 files changed

+467
-5
lines changed

ruby/ql/lib/codeql/ruby/regexp/RegExpTreeView.qll

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,9 @@ class RegExpParent extends TRegExpParent {
6363
/** Gets the number of child terms. */
6464
int getNumChild() { result = count(this.getAChild()) }
6565

66+
/** Gets the last child term of this element. */
67+
RegExpTerm getLastChild() { result = this.getChild(this.getNumChild() - 1) }
68+
6669
/**
6770
* Gets the name of a primary CodeQL class to which this regular
6871
* expression term belongs.
@@ -578,6 +581,15 @@ class RegExpWordBoundary extends RegExpSpecialChar {
578581
RegExpWordBoundary() { this.getChar() = "\\b" }
579582
}
580583

584+
/**
585+
* A non-word boundary, that is, a regular expression term of the form `\B`.
586+
*/
587+
class RegExpNonWordBoundary extends RegExpSpecialChar {
588+
RegExpNonWordBoundary() { this.getChar() = "\\B" }
589+
590+
override string getAPrimaryQlClass() { result = "RegExpNonWordBoundary" }
591+
}
592+
581593
/**
582594
* A character class escape in a regular expression.
583595
* That is, an escaped character that denotes multiple characters.
@@ -857,6 +869,21 @@ class RegExpDot extends RegExpSpecialChar {
857869
override string getAPrimaryQlClass() { result = "RegExpDot" }
858870
}
859871

872+
/**
873+
* A term that matches a specific position between characters in the string.
874+
*
875+
* Example:
876+
*
877+
* ```
878+
* \A
879+
* ```
880+
*/
881+
class RegExpAnchor extends RegExpSpecialChar {
882+
RegExpAnchor() { this.getChar() = ["^", "$", "\\A", "\\Z", "\\z"] }
883+
884+
override string getAPrimaryQlClass() { result = "RegExpAnchor" }
885+
}
886+
860887
/**
861888
* A dollar assertion `$` or `\Z` matching the end of a line.
862889
*
@@ -866,7 +893,7 @@ class RegExpDot extends RegExpSpecialChar {
866893
* $
867894
* ```
868895
*/
869-
class RegExpDollar extends RegExpSpecialChar {
896+
class RegExpDollar extends RegExpAnchor {
870897
RegExpDollar() { this.getChar() = ["$", "\\Z", "\\z"] }
871898

872899
override string getAPrimaryQlClass() { result = "RegExpDollar" }
@@ -881,7 +908,7 @@ class RegExpDollar extends RegExpSpecialChar {
881908
* ^
882909
* ```
883910
*/
884-
class RegExpCaret extends RegExpSpecialChar {
911+
class RegExpCaret extends RegExpAnchor {
885912
RegExpCaret() { this.getChar() = ["^", "\\A"] }
886913

887914
override string getAPrimaryQlClass() { result = "RegExpCaret" }
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
category: newQuery
3+
---
4+
* Added a new query, `rb/regex/missing-regexp-anchor`, which finds regular expressions which are improperly anchored. Validations using such expressions are at risk of being bypassed.
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
<!DOCTYPE qhelp PUBLIC
2+
"-//Semmle//qhelp//EN"
3+
"qhelp.dtd">
4+
<qhelp>
5+
6+
<overview>
7+
<p>
8+
9+
Sanitizing untrusted input with regular expressions is a
10+
common technique. However, it is error-prone to match untrusted input
11+
against regular expressions without anchors such as <code>\A</code> or
12+
<code>\z</code>. Malicious input can bypass such security checks by
13+
embedding one of the allowed patterns in an unexpected location.
14+
15+
</p>
16+
17+
<p>
18+
19+
Even if the matching is not done in a security-critical
20+
context, it may still cause undesirable behavior when the regular
21+
expression accidentally matches.
22+
23+
</p>
24+
</overview>
25+
26+
<recommendation>
27+
<p>
28+
29+
Use anchors to ensure that regular expressions match at
30+
the expected locations.
31+
32+
</p>
33+
</recommendation>
34+
35+
<example>
36+
37+
<p>
38+
39+
The following example code checks that a URL redirection
40+
will reach the <code>example.com</code> domain, or one of its
41+
subdomains, and not some malicious site.
42+
43+
</p>
44+
45+
<sample src="examples/missing_regexp_anchor_bad.rb"/>
46+
47+
<p>
48+
49+
The check with the regular expression match is, however, easy to bypass. For example
50+
by embedding <code>http://example.com/</code> in the query
51+
string component: <code>http://evil-example.net/?x=http://example.com/</code>.
52+
53+
Address these shortcomings by using anchors in the regular expression instead:
54+
55+
</p>
56+
57+
<sample src="examples/missing_regexp_anchor_good.rb"/>
58+
59+
<p>
60+
61+
A related mistake is to write a regular expression with
62+
multiple alternatives, but to only include an anchor for one of the
63+
alternatives. As an example, the regular expression
64+
<code>/^www\.example\.com|beta\.example\.com/</code> will match the host
65+
<code>evil.beta.example.com</code> because the regular expression is parsed
66+
as <code>/(^www\.example\.com)|(beta\.example\.com)/</code>
67+
68+
</p>
69+
70+
<p>
71+
In Ruby the anchors <code>^</code> and <code>$</code> match the
72+
start and end of a line, whereas the anchors <code>\A</code> and
73+
<code>\z</code> match the start and end of the entire string.
74+
75+
Using line anchors can be dangerous, as this can allow malicious
76+
input to be hidden using newlines, leading to vulnerabilities such
77+
as HTTP header injection.
78+
79+
Unless you specifically need the line-matching behaviour of
80+
<code>^</code> and <code>$</code>, you should use <code>\A</code>
81+
and <code>\z</code> instead.
82+
</p>
83+
84+
</example>
85+
86+
<references>
87+
<li>OWASP: <a href="https://www.owasp.org/index.php/Server_Side_Request_Forgery">SSRF</a></li>
88+
<li>OWASP: <a href="https://cheatsheetseries.owasp.org/cheatsheets/Unvalidated_Redirects_and_Forwards_Cheat_Sheet.html">XSS Unvalidated Redirects and Forwards Cheat Sheet</a>.</li>
89+
</references>
90+
</qhelp>

0 commit comments

Comments
 (0)