Skip to content

Commit 4b53e1c

Browse files
authored
Merge pull request github#18304 from yoff/ruby/performance-queries
Ruby: Query for database calls in a loop
2 parents 6045d9b + 0912e3b commit 4b53e1c

File tree

8 files changed

+187
-3
lines changed

8 files changed

+187
-3
lines changed

ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -254,9 +254,8 @@ private Expr getUltimateReceiver(MethodCall call) {
254254
)
255255
}
256256

257-
// A call to `find`, `where`, etc. that may return active record model object(s)
258-
private class ActiveRecordModelFinderCall extends ActiveRecordModelInstantiation, DataFlow::CallNode
259-
{
257+
/** A call to `find`, `where`, etc. that may return active record model object(s) */
258+
class ActiveRecordModelFinderCall extends ActiveRecordModelInstantiation, DataFlow::CallNode {
260259
private ActiveRecordModelClass cls;
261260

262261
ActiveRecordModelFinderCall() {
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
<!DOCTYPE qhelp PUBLIC
2+
"-//Semmle//qhelp//EN"
3+
"qhelp.dtd">
4+
<qhelp>
5+
6+
<overview>
7+
<p>
8+
When a database query operation, for example a call to a query method in the Rails `ActiveRecord::Relation` class, is executed in a loop, this can lead to a performance issue known as an "n+1 query problem".
9+
The database query will be executed in each iteration of the loop.
10+
Performance can usually be improved by performing a single database query outside of a loop, which retrieves all the required objects in a single operation.
11+
</p>
12+
</overview>
13+
<recommendation>
14+
<p>If possible, pull the database query out of the loop and rewrite it to retrieve all the required objects. This replaces multiple database operations with a single one.
15+
</p>
16+
</recommendation>
17+
<example>
18+
<p>The following (suboptimal) example code queries the <code>User</code> object in each iteration of the loop:</p>
19+
<sample src="examples/straight_loop.rb" />
20+
<p>To improve the performance, we instead query the <code>User</code> object once outside the loop, gathering all necessary information in a single query:</p>
21+
<sample src="examples/preload.rb" />
22+
</example>
23+
</qhelp>
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
/**
2+
* @name Database query in a loop
3+
* @description Database queries in a loop can lead to an unnecessary amount of database calls and poor performance.
4+
* @kind problem
5+
* @problem.severity info
6+
* @precision high
7+
* @id rb/database-query-in-loop
8+
* @tags performance
9+
*/
10+
11+
import ruby
12+
private import codeql.ruby.AST
13+
import codeql.ruby.ast.internal.Constant
14+
import codeql.ruby.Concepts
15+
import codeql.ruby.frameworks.ActiveRecord
16+
private import codeql.ruby.TaintTracking
17+
private import codeql.ruby.CFG
18+
private import codeql.ruby.controlflow.internal.Guards as Guards
19+
20+
/** Gets the name of a built-in method that involves a loop operation. */
21+
string getALoopMethodName() {
22+
result in [
23+
"each", "reverse_each", "map", "map!", "foreach", "flat_map", "in_batches", "one?", "all?",
24+
"collect", "collect!", "select", "select!", "reject", "reject!"
25+
]
26+
}
27+
28+
/** A loop, represented by a call to a loop operation. */
29+
class LoopingCall extends DataFlow::CallNode {
30+
Callable loopScope;
31+
32+
LoopingCall() {
33+
this.getMethodName() = getALoopMethodName() and
34+
loopScope = this.getBlock().asCallable().asCallableAstNode()
35+
}
36+
37+
/** Holds if `c` is executed as part of the body of this loop. */
38+
predicate executesCall(DataFlow::CallNode c) { c.asExpr().getScope() = loopScope }
39+
}
40+
41+
/** Holds if `ar` influences a guard that may control the execution of a loop. */
42+
predicate usedInLoopControlGuard(ActiveRecordInstance ar) {
43+
exists(DataFlow::Node insideGuard, CfgNodes::ExprCfgNode guard |
44+
// For a guard like `cond && ar`, the whole guard will not be tainted
45+
// so we need to look at the taint of the individual parts.
46+
insideGuard.asExpr().getExpr() = guard.getExpr().getAChild*()
47+
|
48+
TaintTracking::localTaint(ar, insideGuard) and
49+
guardForLoopControl(guard, _)
50+
)
51+
}
52+
53+
/** Holds if `guard` controls `break` and `break` would break out of a loop. */
54+
predicate guardForLoopControl(CfgNodes::ExprCfgNode guard, CfgNodes::AstCfgNode break) {
55+
Guards::guardControlsBlock(guard, break.getBasicBlock(), _) and
56+
(
57+
break.(CfgNodes::ExprNodes::MethodCallCfgNode).getMethodName() = "raise"
58+
or
59+
break instanceof CfgNodes::ReturningCfgNode
60+
)
61+
}
62+
63+
from LoopingCall loop, ActiveRecordModelFinderCall call
64+
where
65+
loop.executesCall(call) and
66+
// Disregard loops over constants
67+
not isArrayConstant(loop.getReceiver().asExpr(), _) and
68+
// Disregard cases where the looping is influenced by the query result
69+
not usedInLoopControlGuard(call) and
70+
// Only report calls that are likely to be expensive
71+
not call.getMethodName() in ["new", "create"]
72+
select call,
73+
"This call to a database query operation happens inside $@, and could be hoisted to a single call outside the loop.",
74+
loop, "this loop"
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# Preload User data
2+
user_data = User.where(login: repo_names_by_owner.keys).pluck(:login, :id, :type).to_h do |login, id, type|
3+
[login, { id: id, type: type == "User" ? "USER" : "ORGANIZATION" }]
4+
end
5+
6+
repo_names_by_owner.each do |owner_slug, repo_names|
7+
owner_info = user_data[owner_slug]
8+
owner_id = owner_info[:id]
9+
owner_type = owner_info[:type]
10+
rel_conditions = { owner_id: owner_id, name: repo_names }
11+
12+
nwo_rel = nwo_rel.or(RepositorySecurityCenterConfig.where(rel_conditions)) unless neg
13+
nwo_rel = nwo_rel.and(RepositorySecurityCenterConfig.where.not(rel_conditions)) if neg
14+
end
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
repo_names_by_owner.map do |owner_slug, repo_names|
2+
owner_id, owner_type = User.where(login: owner_slug).pluck(:id, :type).first
3+
owner_type = owner_type == "User" ? "USER" : "ORGANIZATION"
4+
rel_conditions = { owner_id: owner_id, name: repo_names }
5+
6+
nwo_rel = nwo_rel.or(RepositorySecurityCenterConfig.where(rel_conditions)) unless neg
7+
nwo_rel = nwo_rel.and(RepositorySecurityCenterConfig.where.not(rel_conditions)) if neg
8+
end
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
| DatabaseQueryInLoop.rb:11:13:11:52 | call to first | This call to a database query operation happens inside $@, and could be hoisted to a single call outside the loop. | DatabaseQueryInLoop.rb:10:9:12:11 | call to map | this loop |
2+
| DatabaseQueryInLoop.rb:16:20:16:59 | call to first | This call to a database query operation happens inside $@, and could be hoisted to a single call outside the loop. | DatabaseQueryInLoop.rb:15:9:21:11 | call to map | this loop |
3+
| DatabaseQueryInLoop.rb:19:17:19:56 | call to first | This call to a database query operation happens inside $@, and could be hoisted to a single call outside the loop. | DatabaseQueryInLoop.rb:18:13:20:15 | call to map | this loop |
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
query: queries/performance/DatabaseQueryInLoop.ql
2+
postprocess: utils/test/InlineExpectationsTestQuery.ql
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
2+
class User < ActiveRecord::Base
3+
end
4+
5+
class DatabaseQueryInLoopTest
6+
def test
7+
### These are bad
8+
9+
# simple query in loop
10+
names.map do |name|
11+
User.where(login: name).pluck(:id).first # $ Alert
12+
end
13+
14+
# nested loop
15+
names.map do |name|
16+
user = User.where(login: name).pluck(:id).first # $ Alert
17+
18+
ids.map do |user_id|
19+
User.where(id: user_id).pluck(:id).first # $ Alert
20+
end
21+
end
22+
23+
### These are OK
24+
25+
# Not in loop
26+
User.where(login: owner_slug).pluck(:id).first
27+
28+
# Loops over constant array
29+
%w(first-name second-name).map { |name| User.where(login: name).pluck(:id).first }
30+
31+
constant_names = [first-name, second-name]
32+
constant_names.each do |name|
33+
User.where(login: name).pluck(:id).first
34+
end
35+
36+
# Loop traversal is influenced by query result
37+
# raising an exception if the user is not found
38+
names.map do |name|
39+
user = User.where(login: name).pluck(:id).first
40+
unless user
41+
raise Error.new("User '#{name}' not found")
42+
end
43+
end
44+
45+
# more complicated condition
46+
names.map do |name|
47+
user = User.where(login: name).pluck(:id).first
48+
unless cond && user
49+
raise Error.new("User '#{name}' not found")
50+
end
51+
end
52+
53+
# skipping through the loop when users are not relevant
54+
names.map do |name|
55+
user = User.where(login: name).pluck(:id).first
56+
if not isRelevant(user)
57+
next
58+
end
59+
end
60+
end
61+
end

0 commit comments

Comments
 (0)