Skip to content

Commit acf28eb

Browse files
committed
add a RegexExecution, and use it to track regular expressions to their uses in a nice way in rb/polynomial-redos
1 parent 6e33dd5 commit acf28eb

File tree

5 files changed

+204
-82
lines changed

5 files changed

+204
-82
lines changed

ruby/ql/lib/codeql/ruby/Concepts.qll

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ private import codeql.ruby.DataFlow
1010
private import codeql.ruby.Frameworks
1111
private import codeql.ruby.dataflow.RemoteFlowSources
1212
private import codeql.ruby.ApiGraphs
13+
private import codeql.ruby.Regexp as RE
1314

1415
/**
1516
* A data-flow node that constructs a SQL statement.
@@ -77,6 +78,55 @@ module SqlExecution {
7778
}
7879
}
7980

81+
/**
82+
* A data-flow node that executes a regular expression.
83+
*
84+
* Extend this class to refine existing API models. If you want to model new APIs,
85+
* extend `RegexExecution::Range` instead.
86+
*/
87+
class RegexExecution extends DataFlow::Node instanceof RegexExecution::Range {
88+
/** Gets the data flow node for the regex being executed by this node. */
89+
DataFlow::Node getRegex() { result = super.getRegex() }
90+
91+
/** Gets a dataflow node for the string to be searched or matched against. */
92+
DataFlow::Node getString() { result = super.getString() }
93+
94+
/** Gets a parsed regular expression term that is executed at this node. */
95+
RE::RegExpTerm getTerm() { result = super.getTerm() }
96+
97+
/**
98+
* Gets the name of this regex execution, typically the name of an executing method.
99+
* This is used for nice alert messages and should include the module if possible.
100+
*/
101+
string getName() { result = super.getName() }
102+
}
103+
104+
/** Provides classes for modeling new regular-expression execution APIs. */
105+
module RegexExecution {
106+
/**
107+
* A data-flow node that executes a regular expression.
108+
*
109+
* Extend this class to model new APIs. If you want to refine existing API models,
110+
* extend `RegexExecution` instead.
111+
*/
112+
abstract class Range extends DataFlow::Node {
113+
/** Gets the data flow node for the regex being executed by this node. */
114+
abstract DataFlow::Node getRegex();
115+
116+
/** Gets a dataflow node for the string to be searched or matched against. */
117+
abstract DataFlow::Node getString();
118+
119+
/** Gets the parsed regular expression term that is executed by this node. */
120+
abstract RE::RegExpTerm getTerm();
121+
122+
/**
123+
* Gets the name of this regex execution, typically the name of an executing method.
124+
* This is used for nice alert messages and should include the module if possible.
125+
*/
126+
abstract string getName();
127+
}
128+
}
129+
80130
/**
81131
* A data flow node that performs a file system access, including reading and writing data,
82132
* creating and deleting files and folders, checking and updating permissions, and so on.

ruby/ql/lib/codeql/ruby/Regexp.qll

Lines changed: 93 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,11 @@
88
import regexp.RegExpTreeView // re-export
99
private import regexp.internal.ParseRegExp
1010
private import regexp.internal.RegExpConfiguration
11-
private import codeql.ruby.ast.Literal as Ast
11+
private import codeql.ruby.AST as Ast
12+
private import codeql.ruby.CFG
1213
private import codeql.ruby.DataFlow
1314
private import codeql.ruby.ApiGraphs
15+
private import codeql.ruby.Concepts
1416

1517
/**
1618
* Provides utility predicates related to regular expressions.
@@ -63,7 +65,11 @@ private class RegExpLiteralPatternSource extends RegExpPatternSource {
6365
private class StringRegExpPatternSource extends RegExpPatternSource {
6466
private DataFlow::Node parse;
6567

66-
StringRegExpPatternSource() { this = regExpSource(parse) }
68+
StringRegExpPatternSource() {
69+
this = regExpSource(parse) and
70+
// `regExpSource()` tracks both strings and regex literals, narrow it down to strings.
71+
this.asExpr().getConstantValue().isString(_)
72+
}
6773

6874
override DataFlow::Node getAParse() { result = parse }
6975

@@ -104,6 +110,7 @@ module RegExpInterpretation {
104110

105111
/**
106112
* A node interpreted as a regular expression.
113+
* Speficically nodes where string values are interpreted as regular expressions.
107114
*/
108115
class StdLibRegExpInterpretation extends RegExpInterpretation::Range {
109116
StdLibRegExpInterpretation() {
@@ -128,3 +135,87 @@ cached
128135
DataFlow::Node regExpSource(DataFlow::Node re) {
129136
exists(RegExpConfiguration c | c.hasFlow(result, re))
130137
}
138+
139+
/**
140+
* Holds if `exec` is a node where `regexp` is interpreted as a regular expression and
141+
* tested against the string value of `input`.
142+
* `name` describes the regexp execution, typically the name of the method being called.
143+
*/
144+
private predicate regexExecution(
145+
DataFlow::Node exec, DataFlow::Node input, DataFlow::Node regexp, string name
146+
) {
147+
// `=~` or `!~`
148+
exists(CfgNodes::ExprNodes::BinaryOperationCfgNode op |
149+
name = op.getOperator() and
150+
exec.asExpr() = op and
151+
(
152+
op.getExpr() instanceof Ast::RegExpMatchExpr or
153+
op.getExpr() instanceof Ast::NoRegExpMatchExpr
154+
) and
155+
(
156+
input.asExpr() = op.getLeftOperand() and regexp.asExpr() = op.getRightOperand()
157+
or
158+
input.asExpr() = op.getRightOperand() and regexp.asExpr() = op.getLeftOperand()
159+
)
160+
)
161+
or
162+
// Any of the methods on `String` that take a regexp.
163+
exists(DataFlow::CallNode call | exec = call |
164+
name = "String#" + call.getMethodName() and
165+
call.getMethodName() =
166+
[
167+
"[]", "gsub", "gsub!", "index", "match", "match?", "partition", "rindex", "rpartition",
168+
"scan", "slice!", "split", "sub", "sub!"
169+
] and
170+
input = call.getReceiver() and
171+
regexp = call.getArgument(0) and
172+
// exclude https://ruby-doc.org/core-2.4.0/Regexp.html#method-i-match, they are handled on the next case of this disjunction
173+
// also see `StdLibRegExpInterpretation`
174+
not (
175+
call.getMethodName() = ["match", "match?"] and
176+
call.getReceiver() = trackRegexpType()
177+
)
178+
)
179+
or
180+
// A call to `match` or `match?` where the regexp is the receiver.
181+
exists(DataFlow::CallNode call | exec = call |
182+
name = "Regexp#" + call.getMethodName() and
183+
call.getMethodName() = ["match", "match?"] and
184+
regexp = call.getReceiver() and
185+
input = call.getArgument(0)
186+
)
187+
or
188+
// a case-when statement
189+
exists(CfgNodes::ExprNodes::CaseExprCfgNode caseWhen |
190+
name = "case-when" and
191+
exec.asExpr() = caseWhen and
192+
input.asExpr() = caseWhen.getValue()
193+
|
194+
regexp.asExpr() = caseWhen.getBranch(_).(CfgNodes::ExprNodes::WhenClauseCfgNode).getPattern(_)
195+
or
196+
regexp.asExpr() = caseWhen.getBranch(_).(CfgNodes::ExprNodes::InClauseCfgNode).getPattern()
197+
)
198+
}
199+
200+
/**
201+
* An execution of a regular expression by the standard library.
202+
*/
203+
private class StdRegexpExecution extends RegexExecution::Range {
204+
DataFlow::Node regexp;
205+
DataFlow::Node input;
206+
string name;
207+
208+
StdRegexpExecution() { regexExecution(this, input, regexp, name) }
209+
210+
override DataFlow::Node getRegex() { result = regexp }
211+
212+
override DataFlow::Node getString() { result = input }
213+
214+
override RegExpTerm getTerm() { result = getTermForNode(regexp) }
215+
216+
override string getName() { result = name }
217+
}
218+
219+
private RegExpTerm getTermForNode(DataFlow::Node node) {
220+
exists(RegExpPatternSource source | source = regExpSource(node) | result = source.getRegExpTerm())
221+
}

ruby/ql/lib/codeql/ruby/regexp/internal/RegExpConfiguration.qll

Lines changed: 38 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -6,25 +6,38 @@ private import codeql.ruby.controlflow.CfgNodes
66
private import codeql.ruby.dataflow.internal.DataFlowImplForRegExp
77
private import codeql.ruby.typetracking.TypeTracker
88
private import codeql.ruby.ApiGraphs
9+
private import codeql.ruby.Concepts
910
private import codeql.ruby.dataflow.internal.DataFlowPrivate as DataFlowPrivate
1011
private import codeql.ruby.TaintTracking
1112
private import codeql.ruby.frameworks.core.String
1213

1314
class RegExpConfiguration extends Configuration {
1415
RegExpConfiguration() { this = "RegExpConfiguration" }
1516

16-
override predicate isSource(DataFlow::Node source) {
17+
override predicate isSource(DataFlow::Node source, DataFlow::FlowState state) {
18+
// track both string literals and regexp literals - the latter for finding executions of regular expressions that are used elsewhere.
19+
state = "string" and
1720
source.asExpr() =
1821
any(ExprCfgNode e |
1922
e.getConstantValue().isString(_) and
2023
not e instanceof ExprNodes::VariableReadAccessCfgNode and
2124
not e instanceof ExprNodes::ConstantReadAccessCfgNode
2225
)
26+
or
27+
state = "reg" and
28+
source.asExpr().getExpr() instanceof Ast::RegExpLiteral
2329
}
2430

25-
override predicate isSink(DataFlow::Node sink) { sink instanceof RegExpInterpretation::Range }
31+
override predicate isSink(DataFlow::Node sink, DataFlow::FlowState state) {
32+
state = "string" and
33+
sink instanceof RegExpInterpretation::Range
34+
or
35+
state = "reg" and
36+
sink = any(RegexExecution exec).getRegex()
37+
}
2638

27-
override predicate isBarrier(DataFlow::Node node) {
39+
override predicate isBarrier(DataFlow::Node node, DataFlow::FlowState state) {
40+
state = "string" and
2841
exists(DataFlow::CallNode mce | mce.getMethodName() = ["match", "match?"] |
2942
// receiver of https://ruby-doc.org/core-2.4.0/String.html#method-i-match
3043
node = mce.getReceiver() and
@@ -36,22 +49,29 @@ class RegExpConfiguration extends Configuration {
3649
)
3750
}
3851

39-
override predicate isAdditionalFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
40-
// include taint flow through `String` summaries
41-
TaintTracking::localTaintStep(nodeFrom, nodeTo) and
42-
nodeFrom.(DataFlowPrivate::SummaryNode).getSummarizedCallable() instanceof
43-
String::SummarizedCallable
44-
or
45-
// string concatenations, and
46-
exists(CfgNodes::ExprNodes::OperationCfgNode op |
47-
op = nodeTo.asExpr() and
48-
op.getAnOperand() = nodeFrom.asExpr() and
49-
op.getExpr().(Ast::BinaryOperation).getOperator() = "+"
52+
override predicate isAdditionalFlowStep(
53+
DataFlow::Node nodeFrom, DataFlow::FlowState stateFrom, DataFlow::Node nodeTo,
54+
DataFlow::FlowState stateTo
55+
) {
56+
stateFrom = stateTo and
57+
stateFrom = "string" and
58+
(
59+
// include taint flow through `String` summaries
60+
TaintTracking::localTaintStep(nodeFrom, nodeTo) and
61+
nodeFrom.(DataFlowPrivate::SummaryNode).getSummarizedCallable() instanceof
62+
String::SummarizedCallable
63+
or
64+
// string concatenations, and
65+
exists(CfgNodes::ExprNodes::OperationCfgNode op |
66+
op = nodeTo.asExpr() and
67+
op.getAnOperand() = nodeFrom.asExpr() and
68+
op.getExpr().(Ast::BinaryOperation).getOperator() = "+"
69+
)
70+
or
71+
// string interpolations
72+
nodeFrom.asExpr() =
73+
nodeTo.asExpr().(CfgNodes::ExprNodes::StringlikeLiteralCfgNode).getAComponent()
5074
)
51-
or
52-
// string interpolations
53-
nodeFrom.asExpr() =
54-
nodeTo.asExpr().(CfgNodes::ExprNodes::StringlikeLiteralCfgNode).getAComponent()
5575
}
5676
}
5777

ruby/ql/lib/codeql/ruby/security/regexp/PolynomialReDoSCustomizations.qll

Lines changed: 6 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ private import codeql.ruby.CFG
99
private import codeql.ruby.DataFlow
1010
private import codeql.ruby.dataflow.RemoteFlowSources
1111
private import codeql.ruby.regexp.RegExpTreeView::RegexTreeView as TreeView
12+
private import codeql.ruby.Regexp as RE
13+
private import codeql.ruby.Concepts
1214

1315
/**
1416
* Provides default sources, sinks and sanitizers for reasoning about
@@ -53,80 +55,22 @@ module PolynomialReDoS {
5355
*/
5456
class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { }
5557

56-
/**
57-
* Gets the AST of a regular expression object that can flow to `node`.
58-
*/
59-
RegExpTerm getRegExpObjectFromNode(DataFlow::Node node) {
60-
exists(DataFlow::LocalSourceNode regexp |
61-
regexp.flowsTo(node) and
62-
result = regexp.asExpr().(CfgNodes::ExprNodes::RegExpLiteralCfgNode).getExpr().getParsed()
63-
)
64-
}
65-
6658
/**
6759
* A regexp match against a superlinear backtracking term, seen as a sink for
6860
* polynomial regular expression denial-of-service vulnerabilities.
6961
*/
7062
class PolynomialBackTrackingTermMatch extends Sink {
7163
PolynomialBackTrackingTerm term;
72-
DataFlow::ExprNode matchNode;
64+
RegexExecution exec;
7365

7466
PolynomialBackTrackingTermMatch() {
75-
exists(DataFlow::Node regexp |
76-
term.getRootTerm() = getRegExpObjectFromNode(regexp) and
77-
(
78-
// `=~` or `!~`
79-
exists(CfgNodes::ExprNodes::BinaryOperationCfgNode op |
80-
matchNode.asExpr() = op and
81-
(
82-
op.getExpr() instanceof Ast::RegExpMatchExpr or
83-
op.getExpr() instanceof Ast::NoRegExpMatchExpr
84-
) and
85-
(
86-
this.asExpr() = op.getLeftOperand() and regexp.asExpr() = op.getRightOperand()
87-
or
88-
this.asExpr() = op.getRightOperand() and regexp.asExpr() = op.getLeftOperand()
89-
)
90-
)
91-
or
92-
// Any of the methods on `String` that take a regexp.
93-
exists(CfgNodes::ExprNodes::MethodCallCfgNode call |
94-
matchNode.asExpr() = call and
95-
call.getExpr().getMethodName() =
96-
[
97-
"[]", "gsub", "gsub!", "index", "match", "match?", "partition", "rindex",
98-
"rpartition", "scan", "slice!", "split", "sub", "sub!"
99-
] and
100-
this.asExpr() = call.getReceiver() and
101-
regexp.asExpr() = call.getArgument(0)
102-
)
103-
or
104-
// A call to `match` or `match?` where the regexp is the receiver.
105-
exists(CfgNodes::ExprNodes::MethodCallCfgNode call |
106-
matchNode.asExpr() = call and
107-
call.getExpr().getMethodName() = ["match", "match?"] and
108-
regexp.asExpr() = call.getReceiver() and
109-
this.asExpr() = call.getArgument(0)
110-
)
111-
or
112-
// a case-when statement
113-
exists(CfgNodes::ExprNodes::CaseExprCfgNode caseWhen |
114-
matchNode.asExpr() = caseWhen and
115-
this.asExpr() = caseWhen.getValue()
116-
|
117-
regexp.asExpr() =
118-
caseWhen.getBranch(_).(CfgNodes::ExprNodes::WhenClauseCfgNode).getPattern(_)
119-
or
120-
regexp.asExpr() =
121-
caseWhen.getBranch(_).(CfgNodes::ExprNodes::InClauseCfgNode).getPattern()
122-
)
123-
)
124-
)
67+
term.getRootTerm() = exec.getTerm() and
68+
this = exec.getString()
12569
}
12670

12771
override RegExpTerm getRegExp() { result = term }
12872

129-
override DataFlow::Node getHighlight() { result = matchNode }
73+
override DataFlow::Node getHighlight() { result = exec }
13074
}
13175

13276
private predicate lengthGuard(CfgNodes::AstCfgNode g, CfgNode node, boolean branch) {

0 commit comments

Comments
 (0)