Skip to content

Commit cf7311f

Browse files
committed
Swift: Expand parse mode support to include NSRegularExpression options.
1 parent cd1e73b commit cf7311f

File tree

3 files changed

+119
-28
lines changed

3 files changed

+119
-28
lines changed

swift/ql/lib/codeql/swift/regex/Regex.qll

Lines changed: 97 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -36,22 +36,23 @@ abstract class RegexCreation extends DataFlow::Node {
3636
* created from.
3737
*/
3838
abstract DataFlow::Node getStringInput();
39+
40+
/**
41+
* Gets a dataflow node for the options input that might contain parse mode
42+
* flags (if any).
43+
*/
44+
DataFlow::Node getOptionsInput() { none() }
3945
}
4046

4147
/**
42-
* A data-flow node where a `Regex` or `NSRegularExpression` object is created.
48+
* A data-flow node where a `Regex` object is created.
4349
*/
44-
private class StandardRegexCreation extends RegexCreation {
50+
private class RegexRegexCreation extends RegexCreation {
4551
DataFlow::Node input;
4652

47-
StandardRegexCreation() {
53+
RegexRegexCreation() {
4854
exists(CallExpr call |
49-
(
50-
call.getStaticTarget().(Method).hasQualifiedName("Regex", ["init(_:)", "init(_:as:)"]) or
51-
call.getStaticTarget()
52-
.(Method)
53-
.hasQualifiedName("NSRegularExpression", "init(pattern:options:)")
54-
) and
55+
call.getStaticTarget().(Method).hasQualifiedName("Regex", ["init(_:)", "init(_:as:)"]) and
5556
input.asExpr() = call.getArgument(0).getExpr() and
5657
this.asExpr() = call
5758
)
@@ -60,6 +61,29 @@ private class StandardRegexCreation extends RegexCreation {
6061
override DataFlow::Node getStringInput() { result = input }
6162
}
6263

64+
/**
65+
* A data-flow node where an `NSRegularExpression` object is created.
66+
*/
67+
private class NSRegularExpressionRegexCreation extends RegexCreation {
68+
DataFlow::Node input;
69+
70+
NSRegularExpressionRegexCreation() {
71+
exists(CallExpr call |
72+
call.getStaticTarget()
73+
.(Method)
74+
.hasQualifiedName("NSRegularExpression", "init(pattern:options:)") and
75+
input.asExpr() = call.getArgument(0).getExpr() and
76+
this.asExpr() = call
77+
)
78+
}
79+
80+
override DataFlow::Node getStringInput() { result = input }
81+
82+
override DataFlow::Node getOptionsInput() {
83+
result.asExpr() = this.asExpr().(CallExpr).getArgument(1).getExpr()
84+
}
85+
}
86+
6387
newtype TRegexParseMode =
6488
MkIgnoreCase() or // case insensitive
6589
MkVerbose() or // ignores whitespace and `#` comments within patterns
@@ -94,25 +118,29 @@ class RegexAdditionalFlowStep extends Unit {
94118
abstract predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo);
95119

96120
/**
97-
* Holds if the step from `node1` to `node2` either sets (`isSet` = true)
98-
* or unsets (`isSet` = false) parse mode `mode` for the regular expression.
121+
* Holds if a regular expression parse mode is either set (`isSet` = true)
122+
* or unset (`isSet` = false) at `node`. Parse modes propagate through
123+
* array construction and regex constuction.
99124
*/
100-
abstract predicate modifiesParseMode(
101-
DataFlow::Node nodeFrom, DataFlow::Node nodeTo, RegexParseMode mode, boolean isSet
102-
);
125+
abstract predicate setsParseMode(DataFlow::Node node, RegexParseMode mode, boolean isSet);
103126
}
104127

105128
/**
106-
* An additional flow step for `Regex` or `NSRegularExpression`.
129+
* An additional flow step for `Regex`.
107130
*/
108-
class StandardRegexAdditionalFlowStep extends RegexAdditionalFlowStep {
131+
class RegexRegexAdditionalFlowStep extends RegexAdditionalFlowStep {
109132
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
110-
this.modifiesParseMode(nodeFrom, nodeTo, _, _)
133+
this.setsParseModeEdge(nodeFrom, nodeTo, _, _)
111134
}
112135

113-
override predicate modifiesParseMode(
136+
override predicate setsParseMode(DataFlow::Node node, RegexParseMode mode, boolean isSet) {
137+
this.setsParseModeEdge(_, node, mode, isSet)
138+
}
139+
140+
private predicate setsParseModeEdge(
114141
DataFlow::Node nodeFrom, DataFlow::Node nodeTo, RegexParseMode mode, boolean isSet
115142
) {
143+
// `Regex` methods that modify parse mode
116144
exists(CallExpr ce |
117145
nodeFrom.asExpr() = ce.getQualifier() and
118146
nodeTo.asExpr() = ce and
@@ -135,6 +163,56 @@ class StandardRegexAdditionalFlowStep extends RegexAdditionalFlowStep {
135163
}
136164
}
137165

166+
/**
167+
* An additional flow step for `NSRegularExpression`.
168+
*/
169+
class StandardRegexAdditionalFlowStep extends RegexAdditionalFlowStep {
170+
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { none() }
171+
172+
override predicate setsParseMode(DataFlow::Node node, RegexParseMode mode, boolean isSet) {
173+
// `NSRegularExpression.Options` values
174+
node.asExpr()
175+
.(MemberRefExpr)
176+
.getMember()
177+
.(FieldDecl)
178+
.hasQualifiedName("NSRegularExpression.Options", "caseInsensitive") and
179+
mode = MkIgnoreCase() and
180+
isSet = true
181+
or
182+
node.asExpr()
183+
.(MemberRefExpr)
184+
.getMember()
185+
.(FieldDecl)
186+
.hasQualifiedName("NSRegularExpression.Options", "allowCommentsAndWhitespace") and
187+
mode = MkVerbose() and
188+
isSet = true
189+
or
190+
node.asExpr()
191+
.(MemberRefExpr)
192+
.getMember()
193+
.(FieldDecl)
194+
.hasQualifiedName("NSRegularExpression.Options", "dotMatchesLineSeparators") and
195+
mode = MkDotAll() and
196+
isSet = true
197+
or
198+
node.asExpr()
199+
.(MemberRefExpr)
200+
.getMember()
201+
.(FieldDecl)
202+
.hasQualifiedName("NSRegularExpression.Options", "anchorsMatchLines") and
203+
mode = MkMultiLine() and
204+
isSet = true
205+
or
206+
node.asExpr()
207+
.(MemberRefExpr)
208+
.getMember()
209+
.(FieldDecl)
210+
.hasQualifiedName("NSRegularExpression.Options", "useUnicodeWordBoundaries") and
211+
mode = MkUnicode() and
212+
isSet = true
213+
}
214+
}
215+
138216
/**
139217
* A call that evaluates a regular expression. For example, the call to `firstMatch` in:
140218
* ```
@@ -174,7 +252,7 @@ abstract class RegexEval extends CallExpr {
174252
RegexParseMode getAParseMode() {
175253
exists(DataFlow::Node setNode |
176254
// parse mode flag is set
177-
any(RegexAdditionalFlowStep s).modifiesParseMode(_, setNode, result, true) and
255+
any(RegexAdditionalFlowStep s).setsParseMode(setNode, result, true) and
178256
// reaches this eval
179257
RegexParseModeFlow::flow(setNode, DataFlow::exprNode(this.getRegexInput()))
180258
)

swift/ql/lib/codeql/swift/regex/internal/RegexTracking.qll

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -53,15 +53,15 @@ module RegexUseFlow = DataFlow::Global<RegexUseConfig>;
5353

5454
/**
5555
* A data flow configuration for tracking regular expression parse mode
56-
* flags from the point they are set to the point of use. The flow state
57-
* encodes which parse mode flag was set.
56+
* flags from wherever they are created or set through to regular expression
57+
* evaluation. The flow state encodes which parse mode flag was set.
5858
*/
5959
private module RegexParseModeConfig implements DataFlow::StateConfigSig {
6060
class FlowState = RegexParseMode;
6161

6262
predicate isSource(DataFlow::Node node, FlowState flowstate) {
6363
// parse mode flag is set
64-
any(RegexAdditionalFlowStep s).modifiesParseMode(_, node, flowstate, true)
64+
any(RegexAdditionalFlowStep s).setsParseMode(node, flowstate, true)
6565
}
6666

6767
predicate isSink(DataFlow::Node node, FlowState flowstate) {
@@ -73,11 +73,24 @@ private module RegexParseModeConfig implements DataFlow::StateConfigSig {
7373
predicate isBarrier(DataFlow::Node node) { none() }
7474

7575
predicate isBarrier(DataFlow::Node node, FlowState flowstate) {
76-
// parse mode flag is set or unset
77-
any(RegexAdditionalFlowStep s).modifiesParseMode(node, _, flowstate, _)
76+
// parse mode flag is unset
77+
any(RegexAdditionalFlowStep s).setsParseMode(node, flowstate, false)
7878
}
7979

8080
predicate isAdditionalFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
81+
// flow through array construction
82+
exists(ArrayExpr arr |
83+
nodeFrom.asExpr() = arr.getAnElement() and
84+
nodeTo.asExpr() = arr
85+
)
86+
or
87+
// flow through regex creation
88+
exists(RegexCreation create |
89+
nodeFrom = create.getOptionsInput() and
90+
nodeTo = create
91+
)
92+
or
93+
// additional flow steps for regular expression objects
8194
any(RegexAdditionalFlowStep s).step(nodeFrom, nodeTo)
8295
}
8396

swift/ql/test/library-tests/regex/regex.swift

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -227,12 +227,12 @@ func myRegexpMethodsTests(b: Bool, str_unknown: String) throws {
227227
_ = try Regex("abc").anchorsMatchLineEndings().firstMatch(in: input) // $ input=input regex=abc modes=MULTILINE
228228

229229
// parse modes set through NSRegularExpression
230-
_ = try NSRegularExpression(pattern: ".*", options: .caseInsensitive).firstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=.* input=input MISSING: modes=IGNORECASE
231-
_ = try NSRegularExpression(pattern: ".*", options: .dotMatchesLineSeparators).firstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=.* input=input MISSING: modes=DOTALL
232-
_ = try NSRegularExpression(pattern: ".*", options: [.caseInsensitive, .dotMatchesLineSeparators]).firstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=.* input=input MISSING: modes="DOTALL | IGNORECASE"
230+
_ = try NSRegularExpression(pattern: ".*", options: .caseInsensitive).firstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=.* input=input modes=IGNORECASE
231+
_ = try NSRegularExpression(pattern: ".*", options: .dotMatchesLineSeparators).firstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=.* input=input modes=DOTALL
232+
_ = try NSRegularExpression(pattern: ".*", options: [.caseInsensitive, .dotMatchesLineSeparators]).firstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=.* input=input modes="DOTALL | IGNORECASE"
233233

234234
let myOptions1 : NSRegularExpression.Options = [.caseInsensitive, .dotMatchesLineSeparators]
235-
_ = try NSRegularExpression(pattern: ".*", options: myOptions1).firstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=.* input=input MISSING: modes="DOTALL | IGNORECASE"
235+
_ = try NSRegularExpression(pattern: ".*", options: myOptions1).firstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=.* input=input modes="DOTALL | IGNORECASE"
236236

237237
// parse modes set through other methods
238238

0 commit comments

Comments
 (0)