Skip to content

Commit 26d5fb2

Browse files
authored
Merge pull request github#11824 from erik-krogh/secondMissAnchor
RB: add query detecting validators that use badly anchored regular expressions on library/remote input
2 parents f07c598 + 3ebac65 commit 26d5fb2

File tree

11 files changed

+225
-0
lines changed

11 files changed

+225
-0
lines changed
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
/**
2+
* Provides default sources, sinks and sanitizers for reasoning about
3+
* missing full-anchored regular expressions, as well as extension
4+
* points for adding your own.
5+
*/
6+
7+
private import ruby
8+
private import codeql.ruby.dataflow.RemoteFlowSources
9+
private import codeql.ruby.frameworks.core.Gem::Gem as Gem
10+
private import codeql.ruby.Regexp as Regexp
11+
private import codeql.ruby.security.regexp.HostnameRegexp
12+
private import codeql.ruby.Concepts
13+
14+
private class RegExpTerm = Regexp::RegExpTerm;
15+
16+
/**
17+
* Provides default sources, sinks and sanitizers for detecting
18+
* missing full-anchored regular expressions, as well as extension
19+
* points for adding your own.
20+
*/
21+
module MissingFullAnchor {
22+
/** A data flow source for missing full-anchored regular expressions. */
23+
abstract class Source extends DataFlow::Node {
24+
/** Gets a description of the source. */
25+
string describe() { result = "user-provided value" }
26+
}
27+
28+
/** A data flow sink for missing full-anchored regular expressions. */
29+
abstract class Sink extends DataFlow::Node {
30+
/** Gets the node where the regexp computation happens. */
31+
abstract DataFlow::Node getCallNode();
32+
33+
/** Gets the regular expression term. */
34+
abstract RegExpTerm getRegex();
35+
}
36+
37+
/** A sanitizer for missing full-anchored regular expressions. */
38+
abstract class Sanitizer extends DataFlow::Node { }
39+
40+
private class RemoteFlowAsSource extends Source instanceof RemoteFlowSource { }
41+
42+
private class LibrayInputAsSource extends Source {
43+
LibrayInputAsSource() { this = Gem::getALibraryInput() }
44+
45+
override string describe() { result = "library input" }
46+
}
47+
48+
private RegExpTerm getABadlyAnchoredTerm() {
49+
exists(RegExpTerm left | left.getRootTerm() = result |
50+
left.(Regexp::RegExpAnchor).getChar() = "^" and
51+
isLeftArmTerm(left)
52+
) and
53+
exists(RegExpTerm right | right.getRootTerm() = result |
54+
right.(Regexp::RegExpAnchor).getChar() = "$" and
55+
isRightArmTerm(right)
56+
)
57+
}
58+
59+
private class DefaultSink extends Sink {
60+
RegexExecution exec;
61+
RegExpTerm term;
62+
63+
DefaultSink() {
64+
exec.getString() = this and
65+
term = Regexp::getTermForExecution(exec) and
66+
term = getABadlyAnchoredTerm() and
67+
// looks like a sanitizer, not just input transformation
68+
exists(Ast::ConditionalExpr ifExpr |
69+
[ifExpr.getCondition(), ifExpr.getCondition().(Ast::UnaryLogicalOperation).getOperand()] =
70+
exec.asExpr().getExpr() and
71+
ifExpr.getBranch(_).(Ast::MethodCall).getMethodName() = "raise"
72+
)
73+
}
74+
75+
override DataFlow::Node getCallNode() { result = exec }
76+
77+
override RegExpTerm getRegex() { result = term }
78+
}
79+
}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
/**
2+
* Provides a taint tracking configuration for reasoning about
3+
* missing full-anchored regular expressions.
4+
*
5+
* Note, for performance reasons: only import this file if
6+
* `MissingFullAnchor::Configuration` is needed, otherwise
7+
* `MissingFullAnchorCustomizations` should be imported instead.
8+
*/
9+
10+
import ruby
11+
import codeql.ruby.TaintTracking
12+
import MissingFullAnchorCustomizations::MissingFullAnchor
13+
14+
/**
15+
* A taint tracking configuration for reasoning about
16+
* missing full-anchored regular expressions.
17+
*/
18+
class Configuration extends TaintTracking::Configuration {
19+
Configuration() { this = "MissingFullAnchor" }
20+
21+
override predicate isSource(DataFlow::Node source) { source instanceof Source }
22+
23+
override predicate isSink(DataFlow::Node sink) { sink instanceof Sink }
24+
25+
override predicate isSanitizer(DataFlow::Node node) { node instanceof Sanitizer }
26+
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
category: newQuery
3+
---
4+
* Added a new query, `rb/regex/badly-anchored-regexp`, to detect regular expression validators that use `^` and `$`
5+
as anchors and therefore might match only a single line of a multi-line string.
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
<!DOCTYPE qhelp PUBLIC
2+
"-//Semmle//qhelp//EN"
3+
"qhelp.dtd">
4+
<qhelp>
5+
6+
<overview>
7+
<p>
8+
Regular expressions in Ruby can use anchors to match the beginning and end of a string.
9+
However, if the <code>^</code> and <code>$</code> anchors are used,
10+
the regular expression can match a single line of a multi-line string.
11+
This allows bad actors to bypass your regular expression checks and inject malicious input.
12+
</p>
13+
</overview>
14+
15+
<recommendation>
16+
<p>
17+
Use the <code>\A</code> and <code>\z</code> anchors since these anchors will always
18+
match the beginning and end of the string, even if the string contains newlines.
19+
</p>
20+
</recommendation>
21+
22+
<example>
23+
24+
<p>
25+
The following (bad) example code uses a regular expression to check that a string contains only digits.
26+
</p>
27+
28+
<sample src="examples/missing_full_anchor_bad.rb" />
29+
30+
<p>
31+
The regular expression <code>/^[0-9]+$/</code> will match a single line of a multi-line string,
32+
which may not be the intended behavior.
33+
The following (good) example code uses the regular expression <code>\A[0-9]+\z</code> to match the entire input string.
34+
</p>
35+
36+
<sample src="examples/missing_full_anchor_good.rb" />
37+
38+
</example>
39+
40+
<references>
41+
<li>
42+
Ruby documentation: <a href="https://ruby-doc.org/3.2.0/Regexp.html#class-Regexp-label-Anchors">Anchors</a>
43+
</li>
44+
</references>
45+
</qhelp>
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
/**
2+
* @name Badly anchored regular expression
3+
* @description Regular expressions anchored using `^` or `$` are vulnerable to bypassing.
4+
* @kind path-problem
5+
* @problem.severity warning
6+
* @security-severity 7.8
7+
* @precision high
8+
* @id rb/regex/badly-anchored-regexp
9+
* @tags correctness
10+
* security
11+
* external/cwe/cwe-020
12+
*/
13+
14+
import codeql.ruby.security.regexp.MissingFullAnchorQuery
15+
import DataFlow::PathGraph
16+
17+
from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink, Sink sinkNode
18+
where config.hasFlowPath(source, sink) and sink.getNode() = sinkNode
19+
select sink, source, sink, "This value depends on $@, and is $@ against a $@.", source.getNode(),
20+
source.getNode().(Source).describe(), sinkNode.getCallNode(), "checked", sinkNode.getRegex(),
21+
"badly anchored regular expression"
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
def bad(input)
2+
raise "Bad input" unless input =~ /^[0-9]+$/
3+
4+
# ....
5+
end
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
def good(input)
2+
raise "Bad input" unless input =~ /\A[0-9]+\z/
3+
4+
# ....
5+
end
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
edges
2+
| impl/miss-anchor.rb:2:12:2:15 | name : | impl/miss-anchor.rb:3:39:3:42 | name |
3+
| impl/miss-anchor.rb:6:12:6:15 | name : | impl/miss-anchor.rb:7:43:7:46 | name |
4+
| impl/miss-anchor.rb:14:12:14:15 | name : | impl/miss-anchor.rb:15:47:15:50 | name |
5+
nodes
6+
| impl/miss-anchor.rb:2:12:2:15 | name : | semmle.label | name : |
7+
| impl/miss-anchor.rb:3:39:3:42 | name | semmle.label | name |
8+
| impl/miss-anchor.rb:6:12:6:15 | name : | semmle.label | name : |
9+
| impl/miss-anchor.rb:7:43:7:46 | name | semmle.label | name |
10+
| impl/miss-anchor.rb:14:12:14:15 | name : | semmle.label | name : |
11+
| impl/miss-anchor.rb:15:47:15:50 | name | semmle.label | name |
12+
subpaths
13+
#select
14+
| impl/miss-anchor.rb:3:39:3:42 | name | impl/miss-anchor.rb:2:12:2:15 | name : | impl/miss-anchor.rb:3:39:3:42 | name | This value depends on $@, and is $@ against a $@. | impl/miss-anchor.rb:2:12:2:15 | name | library input | impl/miss-anchor.rb:3:39:3:89 | ... !~ ... | checked | impl/miss-anchor.rb:3:48:3:88 | ^[A-Za-z0-9\\+\\-_]+(\\/[A-Za-z0-9\\+\\-_]+)*$ | badly anchored regular expression |
15+
| impl/miss-anchor.rb:7:43:7:46 | name | impl/miss-anchor.rb:6:12:6:15 | name : | impl/miss-anchor.rb:7:43:7:46 | name | This value depends on $@, and is $@ against a $@. | impl/miss-anchor.rb:6:12:6:15 | name | library input | impl/miss-anchor.rb:7:43:7:93 | ... !~ ... | checked | impl/miss-anchor.rb:7:52:7:92 | ^[A-Za-z0-9\\+\\-_]+(\\/[A-Za-z0-9\\+\\-_]+)*$ | badly anchored regular expression |
16+
| impl/miss-anchor.rb:15:47:15:50 | name | impl/miss-anchor.rb:14:12:14:15 | name : | impl/miss-anchor.rb:15:47:15:50 | name | This value depends on $@, and is $@ against a $@. | impl/miss-anchor.rb:14:12:14:15 | name | library input | impl/miss-anchor.rb:15:47:15:97 | ... !~ ... | checked | impl/miss-anchor.rb:15:56:15:96 | ^[A-Za-z0-9\\+\\-_]+(\\/[A-Za-z0-9\\+\\-_]+)*$ | badly anchored regular expression |
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
queries/security/cwe-020/MissingFullAnchor.ql
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
class Foobar
2+
def foo1(name)
3+
raise Blabity, 'Invalid thing' if name !~ /^[A-Za-z0-9\+\-_]+(\/[A-Za-z0-9\+\-_]+)*$/ # NOT OK
4+
end
5+
6+
def foo2(name)
7+
raise Blabity, 'Invalid thing' unless name !~ /^[A-Za-z0-9\+\-_]+(\/[A-Za-z0-9\+\-_]+)*$/ # NOT OK
8+
end
9+
10+
def foo3(name)
11+
raise Blabity, 'Invalid thing' unless name !~ /\A[A-Za-z0-9\+\-_]+(\/[A-Za-z0-9\+\-_]+)*\z/ # OK
12+
end
13+
14+
def foo4(name)
15+
raise Blabity, 'Invalid thing' unless not name !~ /^[A-Za-z0-9\+\-_]+(\/[A-Za-z0-9\+\-_]+)*$/ # NOT OK
16+
end
17+
end

0 commit comments

Comments
 (0)