Skip to content

Commit 6f5d58c

Browse files
authored
Merge pull request github#13770 from geoffw0/parsemode3
Swift: Track regular expression parse modes set in code
2 parents eb0b485 + b914686 commit 6f5d58c

File tree

6 files changed

+411
-151
lines changed

6 files changed

+411
-151
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
category: minorAnalysis
3+
---
4+
* The regular expression library now understands mode flags specified by `Regex` methods and the `NSRegularExpression` initializer.

swift/ql/lib/codeql/swift/regex/Regex.qll

Lines changed: 185 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -36,28 +36,191 @@ abstract class RegexCreation extends DataFlow::Node {
3636
* created from.
3737
*/
3838
abstract DataFlow::Node getStringInput();
39+
40+
/**
41+
* Gets a dataflow node for the options input that might contain parse mode
42+
* flags (if any).
43+
*/
44+
DataFlow::Node getOptionsInput() { none() }
3945
}
4046

4147
/**
42-
* A data-flow node where a `Regex` or `NSRegularExpression` object is created.
48+
* A data-flow node where a `Regex` object is created.
4349
*/
44-
private class StandardRegexCreation extends RegexCreation {
50+
private class RegexRegexCreation extends RegexCreation {
4551
DataFlow::Node input;
4652

47-
StandardRegexCreation() {
53+
RegexRegexCreation() {
4854
exists(CallExpr call |
49-
(
50-
call.getStaticTarget().(Method).hasQualifiedName("Regex", ["init(_:)", "init(_:as:)"]) or
51-
call.getStaticTarget()
52-
.(Method)
53-
.hasQualifiedName("NSRegularExpression", "init(pattern:options:)")
54-
) and
55+
call.getStaticTarget().(Method).hasQualifiedName("Regex", ["init(_:)", "init(_:as:)"]) and
56+
input.asExpr() = call.getArgument(0).getExpr() and
57+
this.asExpr() = call
58+
)
59+
}
60+
61+
override DataFlow::Node getStringInput() { result = input }
62+
}
63+
64+
/**
65+
* A data-flow node where an `NSRegularExpression` object is created.
66+
*/
67+
private class NSRegularExpressionRegexCreation extends RegexCreation {
68+
DataFlow::Node input;
69+
70+
NSRegularExpressionRegexCreation() {
71+
exists(CallExpr call |
72+
call.getStaticTarget()
73+
.(Method)
74+
.hasQualifiedName("NSRegularExpression", "init(pattern:options:)") and
5575
input.asExpr() = call.getArgument(0).getExpr() and
5676
this.asExpr() = call
5777
)
5878
}
5979

6080
override DataFlow::Node getStringInput() { result = input }
81+
82+
override DataFlow::Node getOptionsInput() {
83+
result.asExpr() = this.asExpr().(CallExpr).getArgument(1).getExpr()
84+
}
85+
}
86+
87+
private newtype TRegexParseMode =
88+
MkIgnoreCase() or // case insensitive
89+
MkVerbose() or // ignores whitespace and `#` comments within patterns
90+
MkDotAll() or // dot matches all characters, including line terminators
91+
MkMultiLine() or // `^` and `$` also match beginning and end of lines
92+
MkUnicode() // Unicode UAX 29 word boundary mode
93+
94+
/**
95+
* A regular expression parse mode flag.
96+
*/
97+
class RegexParseMode extends TRegexParseMode {
98+
/**
99+
* Gets the name of this parse mode flag.
100+
*/
101+
string getName() {
102+
this = MkIgnoreCase() and result = "IGNORECASE"
103+
or
104+
this = MkVerbose() and result = "VERBOSE"
105+
or
106+
this = MkDotAll() and result = "DOTALL"
107+
or
108+
this = MkMultiLine() and result = "MULTILINE"
109+
or
110+
this = MkUnicode() and result = "UNICODE"
111+
}
112+
113+
/**
114+
* Gets a textual representation of this `RegexParseMode`.
115+
*/
116+
string toString() { result = this.getName() }
117+
}
118+
119+
/**
120+
* A unit class for adding additional flow steps for regular expressions.
121+
*/
122+
class RegexAdditionalFlowStep extends Unit {
123+
/**
124+
* Holds if the step from `node1` to `node2` should be considered a flow
125+
* step for regular expressions.
126+
*/
127+
abstract predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo);
128+
129+
/**
130+
* Holds if a regular expression parse mode is either set (`isSet` = true)
131+
* or unset (`isSet` = false) at `node`. Parse modes propagate through
132+
* array construction and regex construction.
133+
*/
134+
abstract predicate setsParseMode(DataFlow::Node node, RegexParseMode mode, boolean isSet);
135+
}
136+
137+
/**
138+
* An additional flow step for `Regex`.
139+
*/
140+
class RegexRegexAdditionalFlowStep extends RegexAdditionalFlowStep {
141+
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
142+
this.setsParseModeEdge(nodeFrom, nodeTo, _, _)
143+
}
144+
145+
override predicate setsParseMode(DataFlow::Node node, RegexParseMode mode, boolean isSet) {
146+
this.setsParseModeEdge(_, node, mode, isSet)
147+
}
148+
149+
private predicate setsParseModeEdge(
150+
DataFlow::Node nodeFrom, DataFlow::Node nodeTo, RegexParseMode mode, boolean isSet
151+
) {
152+
// `Regex` methods that modify the parse mode of an existing `Regex` object.
153+
exists(CallExpr ce |
154+
nodeFrom.asExpr() = ce.getQualifier() and
155+
nodeTo.asExpr() = ce and
156+
// decode the parse mode being set
157+
(
158+
ce.getStaticTarget().(Method).hasQualifiedName("Regex", "ignoresCase(_:)") and
159+
mode = MkIgnoreCase()
160+
or
161+
ce.getStaticTarget().(Method).hasQualifiedName("Regex", "dotMatchesNewlines(_:)") and
162+
mode = MkDotAll()
163+
or
164+
ce.getStaticTarget().(Method).hasQualifiedName("Regex", "anchorsMatchLineEndings(_:)") and
165+
mode = MkMultiLine()
166+
) and
167+
// decode the value being set
168+
if ce.getArgument(0).getExpr().(BooleanLiteralExpr).getValue() = false
169+
then isSet = false // mode is set to false
170+
else isSet = true // mode is set to true OR mode is set to default (=true) OR mode is set to an unknown value
171+
)
172+
}
173+
}
174+
175+
/**
176+
* An additional flow step for `NSRegularExpression`.
177+
*/
178+
class NSRegularExpressionRegexAdditionalFlowStep extends RegexAdditionalFlowStep {
179+
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { none() }
180+
181+
override predicate setsParseMode(DataFlow::Node node, RegexParseMode mode, boolean isSet) {
182+
// `NSRegularExpression.Options` values (these are typically combined, then passed into
183+
// the `NSRegularExpression` initializer).
184+
node.asExpr()
185+
.(MemberRefExpr)
186+
.getMember()
187+
.(FieldDecl)
188+
.hasQualifiedName("NSRegularExpression.Options", "caseInsensitive") and
189+
mode = MkIgnoreCase() and
190+
isSet = true
191+
or
192+
node.asExpr()
193+
.(MemberRefExpr)
194+
.getMember()
195+
.(FieldDecl)
196+
.hasQualifiedName("NSRegularExpression.Options", "allowCommentsAndWhitespace") and
197+
mode = MkVerbose() and
198+
isSet = true
199+
or
200+
node.asExpr()
201+
.(MemberRefExpr)
202+
.getMember()
203+
.(FieldDecl)
204+
.hasQualifiedName("NSRegularExpression.Options", "dotMatchesLineSeparators") and
205+
mode = MkDotAll() and
206+
isSet = true
207+
or
208+
node.asExpr()
209+
.(MemberRefExpr)
210+
.getMember()
211+
.(FieldDecl)
212+
.hasQualifiedName("NSRegularExpression.Options", "anchorsMatchLines") and
213+
mode = MkMultiLine() and
214+
isSet = true
215+
or
216+
node.asExpr()
217+
.(MemberRefExpr)
218+
.getMember()
219+
.(FieldDecl)
220+
.hasQualifiedName("NSRegularExpression.Options", "useUnicodeWordBoundaries") and
221+
mode = MkUnicode() and
222+
isSet = true
223+
}
61224
}
62225

63226
/**
@@ -91,6 +254,19 @@ abstract class RegexEval extends CallExpr {
91254
RegexUseFlow::flow(regexCreation, DataFlow::exprNode(this.getRegexInput()))
92255
)
93256
}
257+
258+
/**
259+
* Gets a parse mode that is set at this evaluation (in at least one path
260+
* from the creation of the regular expression object).
261+
*/
262+
RegexParseMode getAParseMode() {
263+
exists(DataFlow::Node setNode |
264+
// parse mode flag is set
265+
any(RegexAdditionalFlowStep s).setsParseMode(setNode, result, true) and
266+
// reaches this eval
267+
RegexParseModeFlow::flow(setNode, DataFlow::exprNode(this.getRegexInput()))
268+
)
269+
}
94270
}
95271

96272
/**

swift/ql/lib/codeql/swift/regex/internal/ParseRegex.qll

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
*/
77

88
import swift
9+
private import RegexTracking
10+
private import codeql.swift.regex.Regex
911

1012
/**
1113
* A `Expr` containing a regular expression term, that is, either
@@ -317,14 +319,24 @@ abstract class RegExp extends Expr {
317319
}
318320

319321
/**
320-
* Gets a mode (if any) of this regular expression. Can be any of:
322+
* Gets a mode (if any) of this regular expression in any evaluation. Can be
323+
* any of:
321324
* IGNORECASE
322325
* VERBOSE
323326
* DOTALL
324327
* MULTILINE
325328
* UNICODE
326329
*/
327-
string getAMode() { result = this.getModeFromPrefix() }
330+
string getAMode() {
331+
// mode flags from inside the regex string
332+
result = this.getModeFromPrefix()
333+
or
334+
// mode flags applied to the regex object before evaluation
335+
exists(RegexEval e |
336+
e.getARegex() = this and
337+
result = e.getAParseMode().getName()
338+
)
339+
}
328340

329341
/**
330342
* Holds if the `i`th character could not be parsed.

swift/ql/lib/codeql/swift/regex/internal/RegexTracking.qll

Lines changed: 54 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
import swift
88
import codeql.swift.regex.RegexTreeView
9-
private import codeql.swift.dataflow.DataFlow
9+
import codeql.swift.dataflow.DataFlow
1010
private import ParseRegex
1111
private import codeql.swift.regex.Regex
1212

@@ -37,7 +37,6 @@ private module RegexUseConfig implements DataFlow::ConfigSig {
3737
predicate isSource(DataFlow::Node node) {
3838
// creation of the regex
3939
node instanceof RegexCreation
40-
// TODO: track parse mode flags.
4140
}
4241

4342
predicate isSink(DataFlow::Node node) {
@@ -46,9 +45,60 @@ private module RegexUseConfig implements DataFlow::ConfigSig {
4645
}
4746

4847
predicate isAdditionalFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
49-
// TODO: flow through regex methods that return a modified regex.
50-
none()
48+
any(RegexAdditionalFlowStep s).step(nodeFrom, nodeTo)
5149
}
5250
}
5351

5452
module RegexUseFlow = DataFlow::Global<RegexUseConfig>;
53+
54+
/**
55+
* A data flow configuration for tracking regular expression parse mode
56+
* flags from wherever they are created or set through to regular expression
57+
* evaluation. The flow state encodes which parse mode flag was set.
58+
*/
59+
private module RegexParseModeConfig implements DataFlow::StateConfigSig {
60+
class FlowState = RegexParseMode;
61+
62+
predicate isSource(DataFlow::Node node, FlowState flowstate) {
63+
// parse mode flag is set
64+
any(RegexAdditionalFlowStep s).setsParseMode(node, flowstate, true)
65+
}
66+
67+
predicate isSink(DataFlow::Node node, FlowState flowstate) {
68+
// evaluation of the regex
69+
node.asExpr() = any(RegexEval eval).getRegexInput() and
70+
exists(flowstate)
71+
}
72+
73+
predicate isBarrier(DataFlow::Node node) { none() }
74+
75+
predicate isBarrier(DataFlow::Node node, FlowState flowstate) {
76+
// parse mode flag is unset
77+
any(RegexAdditionalFlowStep s).setsParseMode(node, flowstate, false)
78+
}
79+
80+
predicate isAdditionalFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
81+
// flow through array construction
82+
exists(ArrayExpr arr |
83+
nodeFrom.asExpr() = arr.getAnElement() and
84+
nodeTo.asExpr() = arr
85+
)
86+
or
87+
// flow through regex creation
88+
exists(RegexCreation create |
89+
nodeFrom = create.getOptionsInput() and
90+
nodeTo = create
91+
)
92+
or
93+
// additional flow steps for regular expression objects
94+
any(RegexAdditionalFlowStep s).step(nodeFrom, nodeTo)
95+
}
96+
97+
predicate isAdditionalFlowStep(
98+
DataFlow::Node nodeFrom, FlowState flowstateFrom, DataFlow::Node nodeTo, FlowState flowStateTo
99+
) {
100+
none()
101+
}
102+
}
103+
104+
module RegexParseModeFlow = DataFlow::GlobalWithState<RegexParseModeConfig>;

0 commit comments

Comments
 (0)