Skip to content

Commit ef6f0e5

Browse files
committed
Ruby: Add Improper Memoization query
This query finds cases where a method memoizes its result but fails to include one or more of its parameters in the memoization key (or doesn't use memoization keys at all). This can lead to the method returning incorrect results when subsequently called with different arguments.
1 parent 7c5a838 commit ef6f0e5

File tree

6 files changed

+326
-0
lines changed

6 files changed

+326
-0
lines changed
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
/**
2+
* Provides predicates for reasoning about improper memoization methods.
3+
*/
4+
5+
import ruby
6+
import codeql.ruby.DataFlow
7+
import codeql.ruby.dataflow.internal.DataFlowDispatch
8+
9+
/**
10+
* A `||=` statement that memoizes a value by storing it in an instance variable.
11+
*/
12+
private class MemoStmt extends AssignLogicalOrExpr {
13+
private InstanceVariableAccess instanceVariable;
14+
15+
MemoStmt() { instanceVariable.getParent*() = this.getLeftOperand() }
16+
17+
/**
18+
* Gets the variable access on the LHS of this statement.
19+
* This is the `@a` in `@a ||= e`.
20+
*/
21+
VariableAccess getVariableAccess() { result = instanceVariable }
22+
}
23+
24+
/**
25+
* A `||=` statement that stores a value at a particular location in a Hash,
26+
* which itself is stored in an instance variable.
27+
* For example:
28+
* ```rb
29+
* @a[key] ||= e
30+
* ```
31+
*/
32+
private class HashMemoStmt extends MemoStmt {
33+
HashMemoStmt() {
34+
exists(ElementReference e |
35+
e = this.getLeftOperand() and this.getVariableAccess().getParent+() = e
36+
)
37+
}
38+
}
39+
40+
/**
41+
* A method that may be performing memoization.
42+
*/
43+
private class MemoCandidate extends Method {
44+
MemoCandidate() { exists(MemoStmt m | m.getEnclosingMethod() = this) }
45+
}
46+
47+
/**
48+
* Holds if parameter `p` of `m` is read in the right hand side of `assign`.
49+
*/
50+
private predicate parameterUsedInMemoValue(Method m, Parameter p, MemoStmt a) {
51+
p = m.getAParameter() and
52+
a.getParent+() = m and
53+
p.getAVariable().getAnAccess().getParent*() = a.getRightOperand()
54+
}
55+
56+
/**
57+
* Holds if parameter `p` of `m` is read in the left hand side of `assign`.
58+
*/
59+
private predicate parameterUsedInMemoKey(Method m, Parameter p, HashMemoStmt a) {
60+
p = m.getAParameter() and
61+
a.getParent+() = m and
62+
p.getAVariable().getAnAccess().getParent+() = a.getLeftOperand()
63+
}
64+
65+
/**
66+
* Holds if the assignment `s` is returned from its parent method `m`.
67+
*/
68+
private predicate memoReturnedFromMethod(Method m, MemoStmt s) {
69+
exists(DataFlow::Node n | n.asExpr().getExpr() = s and exprNodeReturnedFrom(n, m))
70+
or
71+
// If we don't have flow (e.g. due to the dataflow library not supporting instance variable flow yet),
72+
// fall back to a syntactic heuristic: does the last statement in the method mention the memoization variable?
73+
m.getLastStmt().getAChild*().(InstanceVariableReadAccess).getVariable() =
74+
s.getVariableAccess().getVariable()
75+
}
76+
77+
/**
78+
* Holds if `m` is a memoization method with a parameter `p` which is not used in the memoization key.
79+
* This can cause stale or incorrect values to be returned when the method is called with different arguments.
80+
*/
81+
predicate isImproperMemoizationMethod(Method m, Parameter p, AssignLogicalOrExpr s) {
82+
m.getName() != "initialize" and
83+
parameterUsedInMemoValue(m, p, s) and
84+
not parameterUsedInMemoKey(m, p, s) and
85+
memoReturnedFromMethod(m, s)
86+
}
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
<!DOCTYPE qhelp PUBLIC
2+
"-//Semmle//qhelp//EN"
3+
"qhelp.dtd">
4+
<qhelp>
5+
<overview>
6+
<p>
7+
A common pattern in Ruby is to memoize the result of a method by storing
8+
it in an instance variable. If the method has no parameters, it can simply
9+
be stored directly in a variable.
10+
</p>
11+
12+
<sample language="ruby">
13+
def answer
14+
@answer ||= calculate_answer
15+
end
16+
</sample>
17+
18+
<p>
19+
If the method takes parameters, then there are multiple results to store
20+
(one for each combination of parameter value). In this case the values
21+
should be stored in a hash, keyed by the parameter values.
22+
</p>
23+
24+
<sample language="ruby">
25+
def answer(x, y)
26+
@answer ||= {}
27+
@answer[x] ||= {}
28+
@answer[x][y] ||= calculate_answer
29+
end
30+
</sample>
31+
32+
<p>
33+
If a memoization method takes parameters but does not include them in the
34+
memoization key, subsequent calls to the method with different parameter
35+
values may incorrectly return the same result. This can lead to the method
36+
returning stale data, or leaking sensitive information.
37+
</p>
38+
</overview>
39+
40+
<example>
41+
<p>
42+
In this example, the method does not include its parameters in the
43+
memoization key. The first call to this method will cache the result, and
44+
subsequent calls will return the same result regardless of what arguments
45+
are given.
46+
</p>
47+
48+
<sample language="ruby">
49+
def answer(x, y)
50+
@answer ||= calculate_answer(x, y)
51+
end
52+
</sample>
53+
54+
<p>
55+
This can be fixed by storing the result of <code>calculate_answer</code>
56+
in a hash, keyed by the parameter values.
57+
</p>
58+
59+
<sample language="ruby">
60+
def answer(x, y)
61+
@answer ||= {}
62+
@answer[x] ||= {}
63+
@answer[x][y] ||= calculate_answer(x, y)
64+
end
65+
</sample>
66+
67+
<p>
68+
Note that if the result of <code>calculate_answer</code> is
69+
<code>false</code> or <code>nil</code>, then it will not be cached.
70+
To cache these values you can use a different pattern:
71+
</p>
72+
73+
<sample language="ruby">
74+
def answer(x, y)
75+
@answer ||= Hash.new do |h1, x|
76+
h1[x] = Hash.new do |h2, y|
77+
h2[y] = calculate_answer(x, y)
78+
end
79+
end
80+
@answer[x][y]
81+
end
82+
</sample>
83+
</example>
84+
</qhelp>
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
/**
2+
* @name Improper Memoization
3+
* @description Omitting a parameter from the key of a memoization method can lead to reading stale or incorrect data.
4+
* @kind problem
5+
* @problem.severity warning
6+
* @precision high
7+
* @tags security
8+
* @id rb/improper-memoization
9+
*/
10+
11+
import ruby
12+
import codeql.ruby.security.ImproperMemoizationQuery
13+
14+
from Method m, Parameter p, AssignLogicalOrExpr s
15+
where isImproperMemoizationMethod(m, p, s)
16+
select m, "A $@ in this memoization method does not form part of the memoization key.", p,
17+
"parameter"
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
failures
2+
| improper_memoization.rb:100:1:104:3 | m14 | Unexpected result: result=BAD |
3+
#select
4+
| improper_memoization.rb:50:1:55:3 | m7 | improper_memoization.rb:50:8:50:10 | arg | improper_memoization.rb:51:3:53:5 | ... \|\|= ... |
5+
| improper_memoization.rb:58:1:63:3 | m8 | improper_memoization.rb:58:8:58:10 | arg | improper_memoization.rb:59:3:61:5 | ... \|\|= ... |
6+
| improper_memoization.rb:66:1:68:3 | m9 | improper_memoization.rb:66:8:66:10 | arg | improper_memoization.rb:67:3:67:34 | ... \|\|= ... |
7+
| improper_memoization.rb:71:1:73:3 | m10 | improper_memoization.rb:71:9:71:12 | arg1 | improper_memoization.rb:72:3:72:42 | ... \|\|= ... |
8+
| improper_memoization.rb:71:1:73:3 | m10 | improper_memoization.rb:71:15:71:18 | arg2 | improper_memoization.rb:72:3:72:42 | ... \|\|= ... |
9+
| improper_memoization.rb:76:1:79:3 | m11 | improper_memoization.rb:76:15:76:18 | arg2 | improper_memoization.rb:78:3:78:48 | ... \|\|= ... |
10+
| improper_memoization.rb:82:1:87:3 | m12 | improper_memoization.rb:82:15:82:18 | arg2 | improper_memoization.rb:83:3:85:5 | ... \|\|= ... |
11+
| improper_memoization.rb:90:1:97:3 | m13 | improper_memoization.rb:90:9:90:10 | id | improper_memoization.rb:91:3:95:5 | ... \|\|= ... |
12+
| improper_memoization.rb:100:1:104:3 | m14 | improper_memoization.rb:100:9:100:11 | arg | improper_memoization.rb:103:3:103:40 | ... \|\|= ... |
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import ruby
2+
import TestUtilities.InlineExpectationsTest
3+
import codeql.ruby.security.ImproperMemoizationQuery
4+
5+
class ImproperMemoizationTest extends InlineExpectationsTest {
6+
ImproperMemoizationTest() { this = "ImproperMemoizationTest" }
7+
8+
override string getARelevantTag() { result = "BAD" }
9+
10+
override predicate hasActualResult(Location location, string element, string tag, string value) {
11+
tag = "result" and
12+
value = "BAD" and
13+
exists(Expr e |
14+
isImproperMemoizationMethod(e, _, _) and
15+
location = e.getLocation() and
16+
element = e.toString()
17+
)
18+
}
19+
}
20+
21+
from Method m, Parameter p, AssignLogicalOrExpr s
22+
where isImproperMemoizationMethod(m, p, s)
23+
select m, p, s
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
# GOOD - Should not trigger CodeQL rule
2+
3+
# No arguments passed to method
4+
def m1
5+
@m1 ||= long_running_method
6+
end
7+
8+
# No arguments passed to method
9+
def m2
10+
@m2 ||= begin
11+
long_running_method
12+
end
13+
end
14+
15+
# OK: argument used in key.
16+
# May be incorrect if arg is `false` or `nil`.
17+
def m3(arg)
18+
@m3 ||= {}
19+
@m3[arg] ||= long_running_method(arg)
20+
end
21+
22+
# OK: both arguments used in key.
23+
# May be incorrect if either arg is `false` or `nil`.
24+
def m4(arg1, arg2)
25+
@m4 ||= {}
26+
@m4[[arg1, arg2]] ||= result(arg1, arg2)
27+
end
28+
29+
# OK: argument used in key.
30+
# Still correct if arg is `false` or `nil`.
31+
def m5(arg)
32+
@m5 ||= Hash.new do |h1, key|
33+
h1[key] = long_running_method(key)
34+
end
35+
@m5[arg]
36+
end
37+
38+
# OK: both arguments used in key.
39+
# Still correct if either arg is `false` or `nil`.
40+
def m6(arg1, arg2)
41+
@m6 ||= Hash.new do |h1, arg1|
42+
h1[arg1] = Hash.new do |h2, arg2|
43+
h2[arg2] = result(arg1, arg2)
44+
end
45+
end
46+
@m6[arg1][arg2]
47+
end
48+
49+
# Bad: method has parameter but only one result is memoized.
50+
def m7(arg) # $result=BAD
51+
@m7 ||= begin
52+
arg += 3
53+
end
54+
@m7
55+
end
56+
57+
# Bad: method has parameter but only one result is memoized.
58+
def m8(arg) # $result=BAD
59+
@m8 ||= begin
60+
long_running_method(arg)
61+
end
62+
@m8
63+
end
64+
65+
# Bad: method has parameter but only one result is memoized.
66+
def m9(arg) # $result=BAD
67+
@m9 ||= long_running_method(arg)
68+
end
69+
70+
# Bad: method has parameter but only one result is memoized.
71+
def m10(arg1, arg2) # $result=BAD
72+
@m10 ||= long_running_method(arg1, arg2)
73+
end
74+
75+
# Bad: `arg2` not used in key.
76+
def m11(arg1, arg2) # $result=BAD
77+
@m11 ||= {}
78+
@m11[arg1] ||= long_running_method(arg1, arg2)
79+
end
80+
81+
# Bad: `arg2` not used in key.
82+
def m12(arg1, arg2) # $result=BAD
83+
@m12 ||= Hash.new do |h1, arg1|
84+
h1[arg1] = result(arg1, arg2)
85+
end
86+
@m12[arg1]
87+
end
88+
89+
# Bad: arg not used in key.
90+
def m13(id:) # $result=BAD
91+
@m13 ||= Rails.cache.fetch("product_sku/#{id}", expires_in: 30.minutes) do
92+
ActiveRecord::Base.transaction do
93+
ProductSku.find_by(id: id)
94+
end
95+
end
96+
@m13
97+
end
98+
99+
# Good (FP): arg is used in key via string interpolation.
100+
def m14(arg)
101+
@m14 ||= {}
102+
key = "foo/#{arg}"
103+
@m14[key] ||= long_running_method(arg)
104+
end

0 commit comments

Comments
 (0)