Skip to content

Commit 6062fbb

Browse files
authored
Merge pull request github#14383 from geoffw0/nsstringregex
Swift: Add regular expression evaluation models for StringProtocol and NSString methods
2 parents d534c93 + 2a552d9 commit 6062fbb

File tree

11 files changed

+559
-312
lines changed

11 files changed

+559
-312
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
category: minorAnalysis
3+
---
4+
5+
* Added models of `StringProtocol` and `NSString` methods that evaluate regular expressions.

swift/ql/lib/codeql/swift/regex/Regex.qll

Lines changed: 176 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -69,10 +69,15 @@ abstract class RegexCreation extends DataFlow::Node {
6969
abstract DataFlow::Node getStringInput();
7070

7171
/**
72-
* Gets a dataflow node for the options input that might contain parse mode
73-
* flags (if any).
72+
* Gets a dataflow node for an options input that might contain options
73+
* such as parse mode flags (if any).
7474
*/
75-
DataFlow::Node getOptionsInput() { none() }
75+
DataFlow::Node getAnOptionsInput() { none() }
76+
77+
/**
78+
* DEPRECATED: Use `getAnOptionsInput()` instead.
79+
*/
80+
deprecated DataFlow::Node getOptionsInput() { result = this.getAnOptionsInput() }
7681
}
7782

7883
/**
@@ -110,7 +115,7 @@ private class NSRegularExpressionRegexCreation extends RegexCreation {
110115

111116
override DataFlow::Node getStringInput() { result = input }
112117

113-
override DataFlow::Node getOptionsInput() {
118+
override DataFlow::Node getAnOptionsInput() {
114119
result.asExpr() = this.asExpr().(CallExpr).getArgument(1).getExpr()
115120
}
116121
}
@@ -121,7 +126,8 @@ private newtype TRegexParseMode =
121126
MkDotAll() or // dot matches all characters, including line terminators
122127
MkMultiLine() or // `^` and `$` also match beginning and end of lines
123128
MkUnicodeBoundary() or // Unicode UAX 29 word boundary mode
124-
MkUnicode() // Unicode matching
129+
MkUnicode() or // Unicode matching
130+
MkAnchoredStart() // match must begin at start of string
125131

126132
/**
127133
* A regular expression parse mode flag.
@@ -142,6 +148,8 @@ class RegexParseMode extends TRegexParseMode {
142148
this = MkUnicodeBoundary() and result = "UNICODEBOUNDARY"
143149
or
144150
this = MkUnicode() and result = "UNICODE"
151+
or
152+
this = MkAnchoredStart() and result = "ANCHOREDSTART"
145153
}
146154

147155
/**
@@ -207,9 +215,9 @@ class RegexRegexAdditionalFlowStep extends RegexAdditionalFlowStep {
207215
}
208216

209217
/**
210-
* An additional flow step for `NSRegularExpression`.
218+
* An additional flow step for `NSRegularExpression.Options`.
211219
*/
212-
class NSRegularExpressionRegexAdditionalFlowStep extends RegexAdditionalFlowStep {
220+
private class NSRegularExpressionRegexAdditionalFlowStep extends RegexAdditionalFlowStep {
213221
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { none() }
214222

215223
override predicate setsParseMode(DataFlow::Node node, RegexParseMode mode, boolean isSet) {
@@ -257,6 +265,34 @@ class NSRegularExpressionRegexAdditionalFlowStep extends RegexAdditionalFlowStep
257265
}
258266
}
259267

268+
/**
269+
* An additional flow step for `NSString.CompareOptions`.
270+
*/
271+
private class NSStringRegexAdditionalFlowStep extends RegexAdditionalFlowStep {
272+
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { none() }
273+
274+
override predicate setsParseMode(DataFlow::Node node, RegexParseMode mode, boolean isSet) {
275+
// `NSString.CompareOptions` values (these are typically combined with
276+
// `NSString.CompareOptions.regularExpression`, then passed into a `StringProtocol`
277+
// or `NSString` method).
278+
node.asExpr()
279+
.(MemberRefExpr)
280+
.getMember()
281+
.(FieldDecl)
282+
.hasQualifiedName("NSString.CompareOptions", "caseInsensitive") and
283+
mode = MkIgnoreCase() and
284+
isSet = true
285+
or
286+
node.asExpr()
287+
.(MemberRefExpr)
288+
.getMember()
289+
.(FieldDecl)
290+
.hasQualifiedName("NSString.CompareOptions", "anchored") and
291+
mode = MkAnchoredStart() and
292+
isSet = true
293+
}
294+
}
295+
260296
/**
261297
* A call that evaluates a regular expression. For example, the call to `firstMatch` in:
262298
* ```
@@ -265,27 +301,46 @@ class NSRegularExpressionRegexAdditionalFlowStep extends RegexAdditionalFlowStep
265301
*/
266302
abstract class RegexEval extends CallExpr {
267303
/**
268-
* Gets the input to this call that is the regular expression being evaluated. This may
269-
* be a regular expression object or a string literal.
304+
* Gets the input to this call that is the regular expression being evaluated.
305+
* This may be a regular expression object or a string literal.
306+
*
307+
* Consider using `getARegex()` instead (which tracks the regular expression
308+
* input back to its source).
309+
*/
310+
abstract DataFlow::Node getRegexInputNode();
311+
312+
/**
313+
* DEPRECATED: Use `getRegexInputNode()` instead.
270314
*/
271-
abstract Expr getRegexInput();
315+
deprecated Expr getRegexInput() { result = this.getRegexInputNode().asExpr() }
272316

273317
/**
274318
* Gets the input to this call that is the string the regular expression is evaluated on.
275319
*/
276-
abstract Expr getStringInput();
320+
abstract DataFlow::Node getStringInputNode();
321+
322+
/**
323+
* DEPRECATED: Use `getStringInputNode()` instead.
324+
*/
325+
deprecated Expr getStringInput() { result = this.getStringInputNode().asExpr() }
326+
327+
/**
328+
* Gets a dataflow node for an options input that might contain options such
329+
* as parse mode flags (if any).
330+
*/
331+
DataFlow::Node getAnOptionsInput() { none() }
277332

278333
/**
279334
* Gets a regular expression value that is evaluated here (if any can be identified).
280335
*/
281336
RegExp getARegex() {
282337
// string literal used directly as a regex
283-
DataFlow::exprNode(result).(ParsedStringRegex).getAParse().asExpr() = this.getRegexInput()
338+
DataFlow::exprNode(result).(ParsedStringRegex).getAParse() = this.getRegexInputNode()
284339
or
285340
// string literal -> regex object -> use
286341
exists(RegexCreation regexCreation |
287342
DataFlow::exprNode(result).(ParsedStringRegex).getAParse() = regexCreation.getStringInput() and
288-
RegexUseFlow::flow(regexCreation, DataFlow::exprNode(this.getRegexInput()))
343+
RegexUseFlow::flow(regexCreation, this.getRegexInputNode())
289344
)
290345
}
291346

@@ -298,7 +353,10 @@ abstract class RegexEval extends CallExpr {
298353
// parse mode flag is set
299354
any(RegexAdditionalFlowStep s).setsParseMode(setNode, result, true) and
300355
// reaches this eval
301-
RegexParseModeFlow::flow(setNode, DataFlow::exprNode(this.getRegexInput()))
356+
(
357+
RegexParseModeFlow::flow(setNode, this.getRegexInputNode()) or
358+
RegexParseModeFlow::flow(setNode, this.getAnOptionsInput())
359+
)
302360
)
303361
}
304362
}
@@ -307,15 +365,15 @@ abstract class RegexEval extends CallExpr {
307365
* A call to a function that always evaluates a regular expression.
308366
*/
309367
private class AlwaysRegexEval extends RegexEval {
310-
Expr regexInput;
311-
Expr stringInput;
368+
DataFlow::Node regexInput;
369+
DataFlow::Node stringInput;
312370

313371
AlwaysRegexEval() {
314372
this.getStaticTarget()
315373
.(Method)
316374
.hasQualifiedName("Regex", ["firstMatch(in:)", "prefixMatch(in:)", "wholeMatch(in:)"]) and
317-
regexInput = this.getQualifier() and
318-
stringInput = this.getArgument(0).getExpr()
375+
regexInput.asExpr() = this.getQualifier() and
376+
stringInput.asExpr() = this.getArgument(0).getExpr()
319377
or
320378
this.getStaticTarget()
321379
.(Method)
@@ -327,8 +385,8 @@ private class AlwaysRegexEval extends RegexEval {
327385
"replaceMatches(in:options:range:withTemplate:)",
328386
"stringByReplacingMatches(in:options:range:withTemplate:)"
329387
]) and
330-
regexInput = this.getQualifier() and
331-
stringInput = this.getArgument(0).getExpr()
388+
regexInput.asExpr() = this.getQualifier() and
389+
stringInput.asExpr() = this.getArgument(0).getExpr()
332390
or
333391
this.getStaticTarget()
334392
.(Method)
@@ -339,8 +397,8 @@ private class AlwaysRegexEval extends RegexEval {
339397
"split(separator:maxSplits:omittingEmptySubsequences:)", "starts(with:)",
340398
"trimmingPrefix(_:)", "wholeMatch(of:)"
341399
]) and
342-
regexInput = this.getArgument(0).getExpr() and
343-
stringInput = this.getQualifier()
400+
regexInput.asExpr() = this.getArgument(0).getExpr() and
401+
stringInput.asExpr() = this.getQualifier()
344402
or
345403
this.getStaticTarget()
346404
.(Method)
@@ -351,11 +409,103 @@ private class AlwaysRegexEval extends RegexEval {
351409
"replacing(_:with:maxReplacements:)", "replacing(_:with:subrange:maxReplacements:)",
352410
"trimPrefix(_:)"
353411
]) and
354-
regexInput = this.getArgument(0).getExpr() and
355-
stringInput = this.getQualifier()
412+
regexInput.asExpr() = this.getArgument(0).getExpr() and
413+
stringInput.asExpr() = this.getQualifier()
356414
}
357415

358-
override Expr getRegexInput() { result = regexInput }
416+
override DataFlow::Node getRegexInputNode() { result = regexInput }
359417

360-
override Expr getStringInput() { result = stringInput }
418+
override DataFlow::Node getStringInputNode() { result = stringInput }
419+
}
420+
421+
/**
422+
* A call to a function that sometimes evaluates a regular expression, if
423+
* `NSString.CompareOptions.regularExpression` is set as an `options` argument.
424+
*
425+
* This is a helper class for `NSStringCompareOptionsRegexEval`.
426+
*/
427+
private class NSStringCompareOptionsPotentialRegexEval extends CallExpr {
428+
DataFlow::Node regexInput;
429+
DataFlow::Node stringInput;
430+
DataFlow::Node optionsInput;
431+
432+
NSStringCompareOptionsPotentialRegexEval() {
433+
(
434+
this.getStaticTarget()
435+
.(Method)
436+
.hasQualifiedName("StringProtocol",
437+
["range(of:options:range:locale:)", "replacingOccurrences(of:with:options:range:)"])
438+
or
439+
this.getStaticTarget()
440+
.(Method)
441+
.hasQualifiedName("NSString",
442+
[
443+
"range(of:options:)", "range(of:options:range:)", "range(of:options:range:locale:)",
444+
"replacingOccurrences(of:with:options:range:)"
445+
])
446+
) and
447+
regexInput.asExpr() = this.getArgument(0).getExpr() and
448+
stringInput.asExpr() = this.getQualifier() and
449+
optionsInput.asExpr() = this.getArgumentWithLabel("options").getExpr()
450+
}
451+
452+
DataFlow::Node getRegexInput() { result = regexInput }
453+
454+
DataFlow::Node getStringInput() { result = stringInput }
455+
456+
DataFlow::Node getAnOptionsInput() { result = optionsInput }
457+
}
458+
459+
/**
460+
* A data flow configuration for tracking `NSString.CompareOptions.regularExpression`
461+
* values from where they are created to the point of use.
462+
*/
463+
private module NSStringCompareOptionsFlagConfig implements DataFlow::ConfigSig {
464+
predicate isSource(DataFlow::Node node) {
465+
// creation of a `NSString.CompareOptions.regularExpression` value
466+
node.asExpr()
467+
.(MemberRefExpr)
468+
.getMember()
469+
.(FieldDecl)
470+
.hasQualifiedName("NSString.CompareOptions", "regularExpression")
471+
}
472+
473+
predicate isSink(DataFlow::Node node) {
474+
// use in a [potential] regex eval `options` argument
475+
any(NSStringCompareOptionsPotentialRegexEval potentialEval).getAnOptionsInput() = node
476+
}
477+
478+
predicate allowImplicitRead(DataFlow::Node node, DataFlow::ContentSet c) {
479+
// flow out from collection content at the sink.
480+
isSink(node) and
481+
c.getAReadContent() instanceof DataFlow::Content::CollectionContent
482+
}
483+
}
484+
485+
module NSStringCompareOptionsFlagFlow = DataFlow::Global<NSStringCompareOptionsFlagConfig>;
486+
487+
/**
488+
* A call to a function that evaluates a regular expression because
489+
* `NSString.CompareOptions.regularExpression` is set as an `options` argument.
490+
*/
491+
private class NSStringCompareOptionsRegexEval extends RegexEval instanceof NSStringCompareOptionsPotentialRegexEval
492+
{
493+
NSStringCompareOptionsRegexEval() {
494+
// check there is flow from a `NSString.CompareOptions.regularExpression` value to an `options` argument;
495+
// if there isn't, the input won't be interpretted as a regular expression.
496+
NSStringCompareOptionsFlagFlow::flow(_,
497+
this.(NSStringCompareOptionsPotentialRegexEval).getAnOptionsInput())
498+
}
499+
500+
override DataFlow::Node getRegexInputNode() {
501+
result = this.(NSStringCompareOptionsPotentialRegexEval).getRegexInput()
502+
}
503+
504+
override DataFlow::Node getStringInputNode() {
505+
result = this.(NSStringCompareOptionsPotentialRegexEval).getStringInput()
506+
}
507+
508+
override DataFlow::Node getAnOptionsInput() {
509+
result = this.(NSStringCompareOptionsPotentialRegexEval).getAnOptionsInput()
510+
}
361511
}

swift/ql/lib/codeql/swift/regex/internal/RegexTracking.qll

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ private module StringLiteralUseConfig implements DataFlow::ConfigSig {
2020

2121
predicate isSink(DataFlow::Node node) {
2222
// evaluated directly as a regular expression
23-
node.asExpr() = any(RegexEval eval).getRegexInput()
23+
node = any(RegexEval eval).getRegexInputNode()
2424
or
2525
// used to create a regular expression object
2626
node = any(RegexCreation regexCreation).getStringInput()
@@ -41,7 +41,7 @@ private module RegexUseConfig implements DataFlow::ConfigSig {
4141

4242
predicate isSink(DataFlow::Node node) {
4343
// evaluation of the regex
44-
node.asExpr() = any(RegexEval eval).getRegexInput()
44+
node = any(RegexEval eval).getRegexInputNode()
4545
}
4646

4747
predicate isAdditionalFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
@@ -65,8 +65,11 @@ private module RegexParseModeConfig implements DataFlow::StateConfigSig {
6565
}
6666

6767
predicate isSink(DataFlow::Node node, FlowState flowstate) {
68-
// evaluation of the regex
69-
node.asExpr() = any(RegexEval eval).getRegexInput() and
68+
// evaluation of a regex
69+
(
70+
node = any(RegexEval eval).getRegexInputNode() or
71+
node = any(RegexEval eval).getAnOptionsInput()
72+
) and
7073
exists(flowstate)
7174
}
7275

@@ -86,7 +89,7 @@ private module RegexParseModeConfig implements DataFlow::StateConfigSig {
8689
or
8790
// flow through regex creation
8891
exists(RegexCreation create |
89-
nodeFrom = create.getOptionsInput() and
92+
nodeFrom = create.getAnOptionsInput() and
9093
nodeTo = create
9194
)
9295
or

swift/ql/lib/codeql/swift/security/regex/RegexInjectionExtensions.qll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ class RegexInjectionAdditionalFlowStep extends Unit {
3737
* These cases are modeled separately.
3838
*/
3939
private class EvalRegexInjectionSink extends RegexInjectionSink {
40-
EvalRegexInjectionSink() { this.asExpr() = any(RegexEval e).getRegexInput() }
40+
EvalRegexInjectionSink() { this = any(RegexEval e).getRegexInputNode() }
4141
}
4242

4343
/**

0 commit comments

Comments
 (0)