Skip to content

Commit bbb8177

Browse files
committed
Ruby: add rc/incomplete-sanitization query
1 parent df2cc18 commit bbb8177

File tree

13 files changed

+756
-1
lines changed

13 files changed

+756
-1
lines changed

ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPublic.qll

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,20 @@ class Node extends TNode {
3939
}
4040

4141
/**
42-
* Gets a local source node from which data may flow to this node in zero or more local data-flow steps.
42+
* Gets a local source node from which data may flow to this node in zero or
43+
* more local data-flow steps.
4344
*/
4445
LocalSourceNode getALocalSource() { result.flowsTo(this) }
46+
47+
/**
48+
* Gets a data flow node from which data may flow to this node in one local step.
49+
*/
50+
Node getAPredecessor() { localFlowStep(result, this) }
51+
52+
/**
53+
* Gets a data flow node to which data may flow from this node in one local step.
54+
*/
55+
Node getASuccessor() { localFlowStep(this, result) }
4556
}
4657

4758
/** A data-flow node corresponding to a call in the control-flow graph. */

ruby/ql/lib/codeql/ruby/frameworks/core/String.qll

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,83 @@ private import codeql.ruby.ApiGraphs
55
private import codeql.ruby.DataFlow
66
private import codeql.ruby.dataflow.FlowSummary
77
private import codeql.ruby.dataflow.internal.DataFlowDispatch
8+
private import codeql.ruby.controlflow.CfgNodes
9+
private import codeql.ruby.Regexp as RE
10+
11+
/**
12+
* A call to a string substitution method, i.e. `String#sub`, `String#sub!`,
13+
* `String#gsub`, or `String#gsub!`.
14+
*
15+
* We heuristically include any call to a method matching the above names,
16+
* provided it has exactly two arguments and a receiver.
17+
*/
18+
class StringSubstitutionCall extends DataFlow::Node {
19+
private ExprNodes::MethodCallCfgNode exprNode;
20+
21+
StringSubstitutionCall() {
22+
this.asExpr() = exprNode and
23+
exprNode.getExpr().getMethodName() = ["sub", "sub!", "gsub", "gsub!"] and
24+
exists(exprNode.getReceiver()) and
25+
exprNode.getNumberOfArguments() = 2
26+
}
27+
28+
/**
29+
* Holds if this is a global substitution, i.e. this is a call to `gsub` or
30+
* `gsub!`.
31+
*/
32+
predicate isGlobal() { exprNode.getExpr().getMethodName() = ["gsub", "gsub!"] }
33+
34+
/**
35+
* Holds if this is a destructive substitution, i.e. this is a call to `sub!`
36+
* or `gsub!`.
37+
*/
38+
predicate isDestructive() { exprNode.getExpr().getMethodName() = ["sub!", "gsub!"] }
39+
40+
/** Gets the receiver of this call. */
41+
DataFlow::Node getReceiver() { result.asExpr() = exprNode.getReceiver() }
42+
43+
/** Gets the first argument to this call. */
44+
DataFlow::Node getPatternArgument() { result.asExpr() = exprNode.getArgument(0) }
45+
46+
/** Gets the second argument to this call. */
47+
DataFlow::Node getReplacementArgument() { result.asExpr() = exprNode.getArgument(1) }
48+
49+
/**
50+
* Gets the regular expression passed as the first (pattern) argument in this
51+
* call, if any.
52+
*/
53+
RE::RegExpPatternSource getPatternRegExp() {
54+
// TODO: using local flow means we miss regexps defined as constants outside
55+
// of the function scope.
56+
result.(DataFlow::LocalSourceNode).flowsTo(this.getPatternArgument())
57+
}
58+
59+
/**
60+
* Gets the string value passed as the first (pattern) argument in this call,
61+
* if any.
62+
*/
63+
string getPatternString() { result = exprNode.getArgument(0).getConstantValue().getString() }
64+
65+
/**
66+
* Gets the string value passed as the second (replacement) argument in this
67+
* call, if any.
68+
*/
69+
string getReplacementString() { result = exprNode.getArgument(1).getConstantValue().getString() }
70+
71+
/** Gets a string that is being replaced by this call. */
72+
string getAReplacedString() {
73+
result = this.getPatternRegExp().getRegExpTerm().getAMatchedString()
74+
or
75+
result = this.getPatternString()
76+
}
77+
78+
/** Holds if this call to `replace` replaces `old` with `new`. */
79+
predicate replaces(string old, string new) {
80+
old = this.getAReplacedString() and
81+
new = this.getReplacementString()
82+
// TODO: handle block-variant of the call
83+
}
84+
}
885

986
/**
1087
* Provides flow summaries for the `String` class.

ruby/ql/lib/codeql/ruby/regexp/RegExpTreeView.qll

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,23 @@ class RegExpTerm extends RegExpParent {
246246
)
247247
}
248248

249+
/**
250+
* Gets the single string this regular-expression term matches.
251+
*
252+
* This predicate is only defined for (sequences/groups of) constant regular
253+
* expressions. In particular, terms involving zero-width assertions like `^`
254+
* or `\b` are not considered to have a constant value.
255+
*
256+
* Note that this predicate does not take flags of the enclosing
257+
* regular-expression literal into account.
258+
*/
259+
string getConstantValue() { none() }
260+
261+
/**
262+
* Gets a string that is matched by this regular-expression term.
263+
*/
264+
string getAMatchedString() { result = this.getConstantValue() }
265+
249266
/** Gets the primary QL class for this term. */
250267
override string getAPrimaryQlClass() { result = "RegExpTerm" }
251268
}
@@ -406,6 +423,19 @@ class RegExpSequence extends RegExpTerm, TRegExpSequence {
406423
)
407424
}
408425

426+
override string getConstantValue() { result = this.getConstantValue(0) }
427+
428+
/**
429+
* Gets the single string matched by the `i`th child and all following
430+
* children of this sequence, if any.
431+
*/
432+
private string getConstantValue(int i) {
433+
i = this.getNumChild() and
434+
result = ""
435+
or
436+
result = this.getChild(i).getConstantValue() + this.getConstantValue(i + 1)
437+
}
438+
409439
override string getAPrimaryQlClass() { result = "RegExpSequence" }
410440
}
411441

@@ -466,6 +496,11 @@ class RegExpAlt extends RegExpTerm, TRegExpAlt {
466496
)
467497
}
468498

499+
/** Gets an alternative of this term. */
500+
RegExpTerm getAlternative() { result = this.getAChild() }
501+
502+
override string getAMatchedString() { result = this.getAlternative().getAMatchedString() }
503+
469504
override string getAPrimaryQlClass() { result = "RegExpAlt" }
470505
}
471506

@@ -611,6 +646,10 @@ class RegExpCharacterClass extends RegExpTerm, TRegExpCharacterClass {
611646
)
612647
}
613648

649+
override string getAMatchedString() {
650+
not this.isInverted() and result = this.getAChild().getAMatchedString()
651+
}
652+
614653
override string getAPrimaryQlClass() { result = "RegExpCharacterClass" }
615654
}
616655

@@ -719,6 +758,8 @@ class RegExpConstant extends RegExpTerm {
719758

720759
override RegExpTerm getChild(int i) { none() }
721760

761+
override string getConstantValue() { result = this.getValue() }
762+
722763
override string getAPrimaryQlClass() { result = "RegExpConstant" }
723764
}
724765

@@ -762,6 +803,10 @@ class RegExpGroup extends RegExpTerm, TRegExpGroup {
762803
re.groupContents(start, end, result.getStart(), result.getEnd())
763804
}
764805

806+
override string getConstantValue() { result = this.getAChild().getConstantValue() }
807+
808+
override string getAMatchedString() { result = this.getAChild().getAMatchedString() }
809+
765810
override string getAPrimaryQlClass() { result = "RegExpGroup" }
766811
}
767812

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
<!DOCTYPE qhelp PUBLIC
2+
"-//Semmle//qhelp//EN"
3+
"qhelp.dtd">
4+
<qhelp>
5+
6+
<overview>
7+
<p>
8+
Sanitizing untrusted input is a common technique for preventing injection attacks such as SQL
9+
injection or cross-site scripting. Usually, this is done by escaping meta-characters such as quotes
10+
in a domain-specific way so that they are treated as normal characters.
11+
</p>
12+
<p>
13+
However, directly using the <code>string#sub</code> method to perform escaping is notoriously
14+
error-prone. Common mistakes include only replacing the first occurrence of a meta-character, or
15+
backslash-escaping various meta-characters but not the backslash itself.
16+
</p>
17+
<p>
18+
In the former case, later meta-characters are left undisturbed and can be used to subvert the
19+
sanitization. In the latter case, preceding a meta-character with a backslash leads to the backslash
20+
being escaped, but the meta-character appearing un-escaped, which again makes the sanitization
21+
ineffective.
22+
</p>
23+
<p>
24+
Even if the escaped string is not used in a security-critical context, incomplete escaping may still
25+
have undesirable effects, such as badly rendered or confusing output.
26+
</p>
27+
</overview>
28+
29+
<recommendation>
30+
<p>
31+
Use a (well-tested) sanitization library if at all possible. These libraries are much more likely to
32+
handle corner cases correctly than a custom implementation.
33+
</p>
34+
35+
<p>
36+
An even safer alternative is to design the application so that sanitization is not needed.
37+
Otherwise, make sure to use <code>string#gsub</code> rather than <code>string#sub</code>, to ensure
38+
that all occurrences are replaced, and remember to escape backslashes if applicable.
39+
</p>
40+
<p>
41+
Note, however, that this is generally <i>not</i> sufficient for replacing multi-character strings:
42+
the <code>string#gsub</code> method performs only one pass over the input string, and will not
43+
replace further instances of the string that result from earlier replacements.
44+
</p>
45+
<p>
46+
For example, consider the code snippet <code>s.gsub /\/\.\.\//, ""</code>, which attempts to strip
47+
out all occurences of <code>/../</code> from <code>s</code>. This will not work as expected: for the
48+
string <code>/./.././</code>, for example, it will remove the single occurrence of <code>/../</code>
49+
in the middle, but the remainder of the string then becomes <code>/../</code>, which is another
50+
instance of the substring we were trying to remove.
51+
</p>
52+
</recommendation>
53+
54+
<example>
55+
<p>
56+
As an example, assume that we want to embed a user-controlled string <code>account_number</code> into
57+
a SQL query as part of a string literal. To avoid SQL injection, we need to ensure that the string
58+
does not contain un-escaped single-quote characters. The following method attempts to ensure this by
59+
doubling single quotes, and thereby escaping them:
60+
</p>
61+
62+
<sample src="examples/IncompleteSanitization.rb" />
63+
64+
<p>
65+
As written, this sanitizer is ineffective: <code>string#sub</code> will replace only the
66+
<i>first</i> occurrence of that string.
67+
</p>
68+
69+
<p>
70+
As mentioned above, the method <code>escape_quotes</code> should be replaced with a purpose-built
71+
sanitizer, such as <code>ActiveRecord::Base::sanitize_sql</code> in Rails, or by using ORM methods
72+
that automatically sanitize parameters.
73+
</p>
74+
75+
<p>
76+
If this is not an option, <code>escape_quotes</code> should be rewritten to use the
77+
<code>string#gsub</code> method instead:
78+
</p>
79+
80+
<sample src="examples/IncompleteSanitizationGood.rb" />
81+
</example>
82+
83+
<references>
84+
<li>OWASP Top 10: <a href="https://www.owasp.org/index.php/Top_10-2017_A1-Injection">A1 Injection</a>.</li>
85+
<li>Rails: <a href="https://api.rubyonrails.org/classes/ActiveRecord/Sanitization/ClassMethods.html">ActiveRecord::Base::sanitize_sql</a>.</li>
86+
</references>
87+
</qhelp>

0 commit comments

Comments
 (0)