Skip to content

Commit 649d7dd

Browse files
authored
Merge pull request github#8607 from github/nickrolfe/incomplete_sanitization
Ruby: port of `js/incomplete-sanitization`
2 parents eacfceb + 9b2a983 commit 649d7dd

File tree

14 files changed

+774
-1
lines changed

14 files changed

+774
-1
lines changed

ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPublic.qll

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,20 @@ class Node extends TNode {
4040
}
4141

4242
/**
43-
* Gets a local source node from which data may flow to this node in zero or more local data-flow steps.
43+
* Gets a local source node from which data may flow to this node in zero or
44+
* more local data-flow steps.
4445
*/
4546
LocalSourceNode getALocalSource() { result.flowsTo(this) }
47+
48+
/**
49+
* Gets a data flow node from which data may flow to this node in one local step.
50+
*/
51+
Node getAPredecessor() { localFlowStep(result, this) }
52+
53+
/**
54+
* Gets a data flow node to which data may flow from this node in one local step.
55+
*/
56+
Node getASuccessor() { localFlowStep(this, result) }
4657
}
4758

4859
/** A data-flow node corresponding to a call in the control-flow graph. */

ruby/ql/lib/codeql/ruby/frameworks/core/String.qll

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,81 @@ private import codeql.ruby.ApiGraphs
55
private import codeql.ruby.DataFlow
66
private import codeql.ruby.dataflow.FlowSummary
77
private import codeql.ruby.dataflow.internal.DataFlowDispatch
8+
private import codeql.ruby.controlflow.CfgNodes
9+
private import codeql.ruby.Regexp as RE
10+
11+
/**
12+
* A call to a string substitution method, i.e. `String#sub`, `String#sub!`,
13+
* `String#gsub`, or `String#gsub!`.
14+
*
15+
* We heuristically include any call to a method matching the above names,
16+
* provided it has exactly two arguments and a receiver.
17+
*/
18+
class StringSubstitutionCall extends DataFlow::CallNode {
19+
StringSubstitutionCall() {
20+
this.getMethodName() = ["sub", "sub!", "gsub", "gsub!"] and
21+
exists(this.getReceiver()) and
22+
this.getNumberOfArguments() = 2
23+
}
24+
25+
/**
26+
* Holds if this is a global substitution, i.e. this is a call to `gsub` or
27+
* `gsub!`.
28+
*/
29+
predicate isGlobal() { this.getMethodName() = ["gsub", "gsub!"] }
30+
31+
/**
32+
* Holds if this is a destructive substitution, i.e. this is a call to `sub!`
33+
* or `gsub!`.
34+
*/
35+
predicate isDestructive() { this.getMethodName() = ["sub!", "gsub!"] }
36+
37+
/** Gets the first argument to this call. */
38+
DataFlow::Node getPatternArgument() { result = this.getArgument(0) }
39+
40+
/** Gets the second argument to this call. */
41+
DataFlow::Node getReplacementArgument() { result = this.getArgument(1) }
42+
43+
/**
44+
* Gets the regular expression passed as the first (pattern) argument in this
45+
* call, if any.
46+
*/
47+
RE::RegExpPatternSource getPatternRegExp() {
48+
// TODO: using local flow means we miss regexps defined as constants outside
49+
// of the function scope.
50+
result.(DataFlow::LocalSourceNode).flowsTo(this.getPatternArgument())
51+
}
52+
53+
/**
54+
* Gets the string value passed as the first (pattern) argument in this call,
55+
* if any.
56+
*/
57+
string getPatternString() {
58+
result = this.getPatternArgument().asExpr().getConstantValue().getString()
59+
}
60+
61+
/**
62+
* Gets the string value passed as the second (replacement) argument in this
63+
* call, if any.
64+
*/
65+
string getReplacementString() {
66+
result = this.getReplacementArgument().asExpr().getConstantValue().getString()
67+
}
68+
69+
/** Gets a string that is being replaced by this call. */
70+
string getAReplacedString() {
71+
result = this.getPatternRegExp().getRegExpTerm().getAMatchedString()
72+
or
73+
result = this.getPatternString()
74+
}
75+
76+
/** Holds if this call to `replace` replaces `old` with `new`. */
77+
predicate replaces(string old, string new) {
78+
old = this.getAReplacedString() and
79+
new = this.getReplacementString()
80+
// TODO: handle block-variant of the call
81+
}
82+
}
883

984
/**
1085
* Provides flow summaries for the `String` class.

ruby/ql/lib/codeql/ruby/regexp/RegExpTreeView.qll

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,23 @@ class RegExpTerm extends RegExpParent {
246246
)
247247
}
248248

249+
/**
250+
* Gets the single string this regular-expression term matches.
251+
*
252+
* This predicate is only defined for (sequences/groups of) constant regular
253+
* expressions. In particular, terms involving zero-width assertions like `^`
254+
* or `\b` are not considered to have a constant value.
255+
*
256+
* Note that this predicate does not take flags of the enclosing
257+
* regular-expression literal into account.
258+
*/
259+
string getConstantValue() { none() }
260+
261+
/**
262+
* Gets a string that is matched by this regular-expression term.
263+
*/
264+
string getAMatchedString() { result = this.getConstantValue() }
265+
249266
/** Gets the primary QL class for this term. */
250267
override string getAPrimaryQlClass() { result = "RegExpTerm" }
251268
}
@@ -406,6 +423,19 @@ class RegExpSequence extends RegExpTerm, TRegExpSequence {
406423
)
407424
}
408425

426+
override string getConstantValue() { result = this.getConstantValue(0) }
427+
428+
/**
429+
* Gets the single string matched by the `i`th child and all following
430+
* children of this sequence, if any.
431+
*/
432+
private string getConstantValue(int i) {
433+
i = this.getNumChild() and
434+
result = ""
435+
or
436+
result = this.getChild(i).getConstantValue() + this.getConstantValue(i + 1)
437+
}
438+
409439
override string getAPrimaryQlClass() { result = "RegExpSequence" }
410440
}
411441

@@ -466,6 +496,11 @@ class RegExpAlt extends RegExpTerm, TRegExpAlt {
466496
)
467497
}
468498

499+
/** Gets an alternative of this term. */
500+
RegExpTerm getAlternative() { result = this.getAChild() }
501+
502+
override string getAMatchedString() { result = this.getAlternative().getAMatchedString() }
503+
469504
override string getAPrimaryQlClass() { result = "RegExpAlt" }
470505
}
471506

@@ -611,6 +646,10 @@ class RegExpCharacterClass extends RegExpTerm, TRegExpCharacterClass {
611646
)
612647
}
613648

649+
override string getAMatchedString() {
650+
not this.isInverted() and result = this.getAChild().getAMatchedString()
651+
}
652+
614653
override string getAPrimaryQlClass() { result = "RegExpCharacterClass" }
615654
}
616655

@@ -719,6 +758,8 @@ class RegExpConstant extends RegExpTerm {
719758

720759
override RegExpTerm getChild(int i) { none() }
721760

761+
override string getConstantValue() { result = this.getValue() }
762+
722763
override string getAPrimaryQlClass() { result = "RegExpConstant" }
723764
}
724765

@@ -762,6 +803,10 @@ class RegExpGroup extends RegExpTerm, TRegExpGroup {
762803
re.groupContents(start, end, result.getStart(), result.getEnd())
763804
}
764805

806+
override string getConstantValue() { result = this.getAChild().getConstantValue() }
807+
808+
override string getAMatchedString() { result = this.getAChild().getAMatchedString() }
809+
765810
override string getAPrimaryQlClass() { result = "RegExpGroup" }
766811
}
767812

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
category: newQuery
3+
---
4+
* Added a new query, `rb/incomplete-sanitization`. The query finds string transformations that do not replace or escape all occurrences of a meta-character.
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
<!DOCTYPE qhelp PUBLIC
2+
"-//Semmle//qhelp//EN"
3+
"qhelp.dtd">
4+
<qhelp>
5+
6+
<overview>
7+
<p>
8+
Sanitizing untrusted input is a common technique for preventing injection attacks such as SQL
9+
injection or cross-site scripting. Usually, this is done by escaping meta-characters such as quotes
10+
in a domain-specific way so that they are treated as normal characters.
11+
</p>
12+
<p>
13+
However, directly using the <code>String#sub</code> method to perform escaping is notoriously
14+
error-prone. Common mistakes include only replacing the first occurrence of a meta-character, or
15+
backslash-escaping various meta-characters but not the backslash itself.
16+
</p>
17+
<p>
18+
In the former case, later meta-characters are left undisturbed and can be used to subvert the
19+
sanitization. In the latter case, preceding a meta-character with a backslash leads to the backslash
20+
being escaped, but the meta-character appearing un-escaped, which again makes the sanitization
21+
ineffective.
22+
</p>
23+
<p>
24+
Even if the escaped string is not used in a security-critical context, incomplete escaping may still
25+
have undesirable effects, such as badly rendered or confusing output.
26+
</p>
27+
</overview>
28+
29+
<recommendation>
30+
<p>
31+
Use a (well-tested) sanitization library if at all possible. These libraries are much more likely to
32+
handle corner cases correctly than a custom implementation.
33+
</p>
34+
35+
<p>
36+
An even safer alternative is to design the application so that sanitization is not needed.
37+
Otherwise, make sure to use <code>String#gsub</code> rather than <code>String#sub</code>, to ensure
38+
that all occurrences are replaced, and remember to escape backslashes if applicable.
39+
</p>
40+
<p>
41+
Note, however, that this is generally <i>not</i> sufficient for replacing multi-character strings:
42+
the <code>String#gsub</code> method performs only one pass over the input string, and will not
43+
replace further instances of the string that result from earlier replacements.
44+
</p>
45+
<p>
46+
For example, consider the code snippet <code>s.gsub /\/\.\.\//, ""</code>, which attempts to strip
47+
out all occurences of <code>/../</code> from <code>s</code>. This will not work as expected: for the
48+
string <code>/./.././</code>, for example, it will remove the single occurrence of <code>/../</code>
49+
in the middle, but the remainder of the string then becomes <code>/../</code>, which is another
50+
instance of the substring we were trying to remove.
51+
</p>
52+
</recommendation>
53+
54+
<example>
55+
<p>
56+
As an example, assume that we want to embed a user-controlled string <code>account_number</code> into
57+
a SQL query as part of a string literal. To avoid SQL injection, we need to ensure that the string
58+
does not contain un-escaped single-quote characters. The following method attempts to ensure this by
59+
doubling single quotes, and thereby escaping them:
60+
</p>
61+
62+
<sample src="examples/IncompleteSanitization.rb" />
63+
64+
<p>
65+
As written, this sanitizer is ineffective: <code>String#sub</code> will replace only the
66+
<i>first</i> occurrence of that string.
67+
</p>
68+
69+
<p>
70+
As mentioned above, the method <code>escape_quotes</code> should be replaced with a purpose-built
71+
sanitizer, such as <code>ActiveRecord::Base::sanitize_sql</code> in Rails, or by using ORM methods
72+
that automatically sanitize parameters.
73+
</p>
74+
75+
<p>
76+
If this is not an option, <code>escape_quotes</code> should be rewritten to use the
77+
<code>String#gsub</code> method instead:
78+
</p>
79+
80+
<sample src="examples/IncompleteSanitizationGood.rb" />
81+
</example>
82+
83+
<references>
84+
<li>OWASP Top 10: <a href="https://www.owasp.org/index.php/Top_10-2017_A1-Injection">A1 Injection</a>.</li>
85+
<li>Rails: <a href="https://api.rubyonrails.org/classes/ActiveRecord/Sanitization/ClassMethods.html">ActiveRecord::Base::sanitize_sql</a>.</li>
86+
</references>
87+
</qhelp>

0 commit comments

Comments
 (0)